[cig-commits] r19622 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER: . DATA EXAMPLES/global_PREM_kernels/amplitude/DATA EXAMPLES/global_PREM_kernels/traveltime/DATA EXAMPLES/global_s362ani/DATA EXAMPLES/regional_Greece_small/DATA EXAMPLES/regional_MiddleEast/DATA setup src src/auxiliaries src/cuda src/meshfem3D src/shared src/specfem3D
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Tue Feb 14 07:11:09 PST 2012
Author: danielpeter
Date: 2012-02-14 07:11:07 -0800 (Tue, 14 Feb 2012)
New Revision: 19622
Added:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/save_and_compare_cpu_vs_gpu.c
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_GLL_points.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90
Modified:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/Par_file
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_PREM_kernels/amplitude/DATA/Par_file
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_PREM_kernels/traveltime/DATA/Par_file
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_s362ani/DATA/Par_file
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_MiddleEast/DATA/Par_file
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure.ac
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/flags.guess
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/auxiliaries/create_movie_AVS_DX.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/calc_jacobian.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_attenuation.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_s362ani.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/broadcast_compute_parameters.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/memory_eval.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/save_header_file.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_arrays_source.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_seismograms.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/initialize_simulation.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_forward_arrays.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_forward_arrays.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_volume.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90
Log:
adds modules and cuda routines
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/Par_file
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/Par_file 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/DATA/Par_file 2012-02-14 15:11:07 UTC (rev 19622)
@@ -5,23 +5,23 @@
SAVE_FORWARD = .false. # save last frame of forward simulation or not
# number of chunks (1,2,3 or 6)
-NCHUNKS = 6
+NCHUNKS = 1
# angular width of the first chunk (not used if full sphere with six chunks)
-ANGULAR_WIDTH_XI_IN_DEGREES = 90.d0 # angular size of a chunk
-ANGULAR_WIDTH_ETA_IN_DEGREES = 90.d0
+ANGULAR_WIDTH_XI_IN_DEGREES = 20.d0 # angular size of a chunk
+ANGULAR_WIDTH_ETA_IN_DEGREES = 20.d0
CENTER_LATITUDE_IN_DEGREES = 40.d0
-CENTER_LONGITUDE_IN_DEGREES = 10.d0
-GAMMA_ROTATION_AZIMUTH = 20.d0
+CENTER_LONGITUDE_IN_DEGREES = 25.d0
+GAMMA_ROTATION_AZIMUTH = 0.d0
# number of elements at the surface along the two sides of the first chunk
# (must be multiple of 16 and 8 * multiple of NPROC below)
-NEX_XI = 240
-NEX_ETA = 240
+NEX_XI = 64
+NEX_ETA = 64
# number of MPI processors along the two sides of the first chunk
-NPROC_XI = 5
-NPROC_ETA = 5
+NPROC_XI = 2
+NPROC_ETA = 2
# 1D models with real structure:
# 1D_isotropic_prem, 1D_transversely_isotropic_prem, 1D_iasp91, 1D_1066a, 1D_ak135, 1D_ref, 1D_ref_iso, 1D_jp3d,1D_sea99
@@ -49,10 +49,10 @@
ATTENUATION = .true.
# absorbing boundary conditions for a regional simulation
-ABSORBING_CONDITIONS = .false.
+ABSORBING_CONDITIONS = .true.
# record length in minutes
-RECORD_LENGTH_IN_MINUTES = 15.0d0
+RECORD_LENGTH_IN_MINUTES = 2.5d0
# save AVS or OpenDX movies
#MOVIE_COARSE saves movie only at corners of elements (SURFACE OR VOLUME)
@@ -90,7 +90,7 @@
LOCAL_PATH = ./DATABASES_MPI
# interval at which we output time step info and max of norm of displacement
-NTSTEP_BETWEEN_OUTPUT_INFO = 1000
+NTSTEP_BETWEEN_OUTPUT_INFO = 100
# interval in time steps for temporary writing of seismograms
NTSTEP_BETWEEN_OUTPUT_SEISMOS = 5000000
@@ -118,3 +118,5 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_PREM_kernels/amplitude/DATA/Par_file
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_PREM_kernels/amplitude/DATA/Par_file 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_PREM_kernels/amplitude/DATA/Par_file 2012-02-14 15:11:07 UTC (rev 19622)
@@ -118,3 +118,5 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_PREM_kernels/traveltime/DATA/Par_file
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_PREM_kernels/traveltime/DATA/Par_file 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_PREM_kernels/traveltime/DATA/Par_file 2012-02-14 15:11:07 UTC (rev 19622)
@@ -118,3 +118,5 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_s362ani/DATA/Par_file
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_s362ani/DATA/Par_file 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_s362ani/DATA/Par_file 2012-02-14 15:11:07 UTC (rev 19622)
@@ -118,3 +118,5 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/DATA/Par_file 2012-02-14 15:11:07 UTC (rev 19622)
@@ -112,3 +112,6 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
+
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_MiddleEast/DATA/Par_file
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_MiddleEast/DATA/Par_file 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_MiddleEast/DATA/Par_file 2012-02-14 15:11:07 UTC (rev 19622)
@@ -112,3 +112,5 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure 2012-02-14 15:11:07 UTC (rev 19622)
@@ -1,24 +1,20 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.67 for Specfem3D Globe 5.1.2.
+# Generated by GNU Autoconf 2.63 for Specfem3D Globe 5.1.2.
#
# Report bugs to <jtromp AT princeton.edu>.
#
-#
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
-# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
-# Foundation, Inc.
-#
-#
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
# This configure script is free software; the Free Software Foundation
# gives unlimited permission to copy, distribute and modify it.
-## -------------------- ##
-## M4sh Initialization. ##
-## -------------------- ##
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
# Be more Bourne compatible
DUALCASE=1; export DUALCASE # for MKS sh
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
emulate sh
NULLCMD=:
# Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
@@ -26,15 +22,23 @@
alias -g '${1+"$@"}'='"$@"'
setopt NO_GLOB_SUBST
else
- case `(set -o) 2>/dev/null` in #(
- *posix*) :
- set -o posix ;; #(
- *) :
- ;;
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
esac
+
fi
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
as_nl='
'
export as_nl
@@ -42,13 +46,7 @@
as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
-# Prefer a ksh shell builtin over an external printf program on Solaris,
-# but without wasting forks for bash or zsh.
-if test -z "$BASH_VERSION$ZSH_VERSION" \
- && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
- as_echo='print -r --'
- as_echo_n='print -rn --'
-elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
as_echo='printf %s\n'
as_echo_n='printf %s'
else
@@ -59,7 +57,7 @@
as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
as_echo_n_body='eval
arg=$1;
- case $arg in #(
+ case $arg in
*"$as_nl"*)
expr "X$arg" : "X\\(.*\\)$as_nl";
arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
@@ -82,7 +80,14 @@
}
fi
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
# IFS
# We need space, tab and new line, in precisely that order. Quoting is
# there to prevent editors from complaining about space-tab.
@@ -91,15 +96,15 @@
IFS=" "" $as_nl"
# Find who we are. Look in the path if we contain no directory separator.
-case $0 in #((
+case $0 in
*[\\/]* ) as_myself=$0 ;;
*) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
- done
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
IFS=$as_save_IFS
;;
@@ -111,16 +116,12 @@
fi
if test ! -f "$as_myself"; then
$as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
- exit 1
+ { (exit 1); exit 1; }
fi
-# Unset variables that we do not need and which cause bugs (e.g. in
-# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
-# suppresses any "Segmentation fault" message there. '((' could
-# trigger a bug in pdksh 5.2.14.
-for as_var in BASH_ENV ENV MAIL MAILPATH
-do eval test x\${$as_var+set} = xset \
- && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
done
PS1='$ '
PS2='> '
@@ -132,300 +133,330 @@
LANGUAGE=C
export LANGUAGE
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+
# CDPATH.
-(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+$as_unset CDPATH
+
if test "x$CONFIG_SHELL" = x; then
- as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
- emulate sh
- NULLCMD=:
- # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '\${1+\"\$@\"}'='\"\$@\"'
- setopt NO_GLOB_SUBST
+ if (eval ":") 2>/dev/null; then
+ as_have_required=yes
else
- case \`(set -o) 2>/dev/null\` in #(
- *posix*) :
- set -o posix ;; #(
- *) :
- ;;
-esac
+ as_have_required=no
fi
-"
- as_required="as_fn_return () { (exit \$1); }
-as_fn_success () { as_fn_return 0; }
-as_fn_failure () { as_fn_return 1; }
-as_fn_ret_success () { return 0; }
-as_fn_ret_failure () { return 1; }
+ if test $as_have_required = yes && (eval ":
+(as_func_return () {
+ (exit \$1)
+}
+as_func_success () {
+ as_func_return 0
+}
+as_func_failure () {
+ as_func_return 1
+}
+as_func_ret_success () {
+ return 0
+}
+as_func_ret_failure () {
+ return 1
+}
+
exitcode=0
-as_fn_success || { exitcode=1; echo as_fn_success failed.; }
-as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
-as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
-as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
-if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
+if as_func_success; then
+ :
+else
+ exitcode=1
+ echo as_func_success failed.
+fi
+if as_func_failure; then
+ exitcode=1
+ echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+ :
else
- exitcode=1; echo positional parameters were not saved.
+ exitcode=1
+ echo as_func_ret_success failed.
fi
-test x\$exitcode = x0 || exit 1"
- as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
- as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
- eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
- test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
-test \$(( 1 + 1 )) = 2 || exit 1"
- if (eval "$as_required") 2>/dev/null; then :
- as_have_required=yes
+
+if as_func_ret_failure; then
+ exitcode=1
+ echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+ :
else
- as_have_required=no
+ exitcode=1
+ echo positional parameters were not saved.
fi
- if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then :
+test \$exitcode = 0) || { (exit 1); exit 1; }
+
+(
+ as_lineno_1=\$LINENO
+ as_lineno_2=\$LINENO
+ test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" &&
+ test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; }
+") 2> /dev/null; then
+ :
else
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-as_found=false
+ as_candidate_shells=
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- as_found=:
- case $as_dir in #(
+ case $as_dir in
/*)
for as_base in sh bash ksh sh5; do
- # Try only shells that exist, to save several forks.
- as_shell=$as_dir/$as_base
- if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
- { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then :
- CONFIG_SHELL=$as_shell as_have_required=yes
- if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then :
- break 2
-fi
-fi
+ as_candidate_shells="$as_candidate_shells $as_dir/$as_base"
done;;
esac
- as_found=false
done
-$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
- { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then :
- CONFIG_SHELL=$SHELL as_have_required=yes
-fi; }
IFS=$as_save_IFS
- if test "x$CONFIG_SHELL" != x; then :
- # We cannot yet assume a decent shell, so we have to provide a
- # neutralization value for shells without unset; and this also
- # works around shells that cannot unset nonexistent variables.
- BASH_ENV=/dev/null
- ENV=/dev/null
- (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
- export CONFIG_SHELL
- exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+ for as_shell in $as_candidate_shells $SHELL; do
+ # Try only shells that exist, to save several forks.
+ if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+ { ("$as_shell") 2> /dev/null <<\_ASEOF
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
+esac
+
fi
- if test x$as_have_required = xno; then :
- $as_echo "$0: This script requires a shell more modern than all"
- $as_echo "$0: the shells that I found on your system."
- if test x${ZSH_VERSION+set} = xset ; then
- $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should"
- $as_echo "$0: be upgraded to zsh 4.3.4 or later."
- else
- $as_echo "$0: Please tell bug-autoconf at gnu.org and jtromp AT
-$0: princeton.edu about your system, including any error
-$0: possibly output before this message. Then install a
-$0: modern shell, or manually run the script under such a
-$0: shell if you do have one."
- fi
- exit 1
+
+:
+_ASEOF
+}; then
+ CONFIG_SHELL=$as_shell
+ as_have_required=yes
+ if { "$as_shell" 2> /dev/null <<\_ASEOF
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
+esac
+
fi
+
+
+:
+(as_func_return () {
+ (exit $1)
+}
+as_func_success () {
+ as_func_return 0
+}
+as_func_failure () {
+ as_func_return 1
+}
+as_func_ret_success () {
+ return 0
+}
+as_func_ret_failure () {
+ return 1
+}
+
+exitcode=0
+if as_func_success; then
+ :
+else
+ exitcode=1
+ echo as_func_success failed.
fi
+
+if as_func_failure; then
+ exitcode=1
+ echo as_func_failure succeeded.
fi
-SHELL=${CONFIG_SHELL-/bin/sh}
-export SHELL
-# Unset more variables known to interfere with behavior of common tools.
-CLICOLOR_FORCE= GREP_OPTIONS=
-unset CLICOLOR_FORCE GREP_OPTIONS
-## --------------------- ##
-## M4sh Shell Functions. ##
-## --------------------- ##
-# as_fn_unset VAR
-# ---------------
-# Portably unset VAR.
-as_fn_unset ()
-{
- { eval $1=; unset $1;}
-}
-as_unset=as_fn_unset
+if as_func_ret_success; then
+ :
+else
+ exitcode=1
+ echo as_func_ret_success failed.
+fi
-# as_fn_set_status STATUS
-# -----------------------
-# Set $? to STATUS, without forking.
-as_fn_set_status ()
-{
- return $1
-} # as_fn_set_status
+if as_func_ret_failure; then
+ exitcode=1
+ echo as_func_ret_failure succeeded.
+fi
-# as_fn_exit STATUS
-# -----------------
-# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
-as_fn_exit ()
-{
- set +e
- as_fn_set_status $1
- exit $1
-} # as_fn_exit
+if ( set x; as_func_ret_success y && test x = "$1" ); then
+ :
+else
+ exitcode=1
+ echo positional parameters were not saved.
+fi
-# as_fn_mkdir_p
-# -------------
-# Create "$as_dir" as a directory, including parents if necessary.
-as_fn_mkdir_p ()
-{
+test $exitcode = 0) || { (exit 1); exit 1; }
- case $as_dir in #(
- -*) as_dir=./$as_dir;;
- esac
- test -d "$as_dir" || eval $as_mkdir_p || {
- as_dirs=
- while :; do
- case $as_dir in #(
- *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
- *) as_qdir=$as_dir;;
- esac
- as_dirs="'$as_qdir' $as_dirs"
- as_dir=`$as_dirname -- "$as_dir" ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
- test -d "$as_dir" && break
- done
- test -z "$as_dirs" || eval "mkdir $as_dirs"
- } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+(
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; }
+_ASEOF
+}; then
+ break
+fi
-} # as_fn_mkdir_p
-# as_fn_append VAR VALUE
-# ----------------------
-# Append the text in VALUE to the end of the definition contained in VAR. Take
-# advantage of any shell optimizations that allow amortized linear growth over
-# repeated appends, instead of the typical quadratic growth present in naive
-# implementations.
-if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
- eval 'as_fn_append ()
- {
- eval $1+=\$2
- }'
-else
- as_fn_append ()
- {
- eval $1=\$$1\$2
- }
-fi # as_fn_append
+fi
-# as_fn_arith ARG...
-# ------------------
-# Perform arithmetic evaluation on the ARGs, and store the result in the
-# global $as_val. Take advantage of shells that can avoid forks. The arguments
-# must be portable across $(()) and expr.
-if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
- eval 'as_fn_arith ()
- {
- as_val=$(( $* ))
- }'
-else
- as_fn_arith ()
- {
- as_val=`expr "$@" || test $? -eq 1`
- }
-fi # as_fn_arith
+ done
+ if test "x$CONFIG_SHELL" != x; then
+ for as_var in BASH_ENV ENV
+ do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+ done
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+fi
-# as_fn_error STATUS ERROR [LINENO LOG_FD]
-# ----------------------------------------
-# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
-# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with STATUS, using 1 if that was 0.
-as_fn_error ()
-{
- as_status=$1; test $as_status -eq 0 && as_status=1
- if test "$4"; then
- as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
- fi
- $as_echo "$as_me: error: $2" >&2
- as_fn_exit $as_status
-} # as_fn_error
-if expr a : '\(a\)' >/dev/null 2>&1 &&
- test "X`expr 00001 : '.*\(...\)'`" = X001; then
- as_expr=expr
+ if test $as_have_required = no; then
+ echo This script requires a shell more modern than all the
+ echo shells that I found on your system. Please install a
+ echo modern shell, or manually run the script under such a
+ echo shell if you do have one.
+ { (exit 1); exit 1; }
+fi
+
+
+fi
+
+fi
+
+
+
+(eval "as_func_return () {
+ (exit \$1)
+}
+as_func_success () {
+ as_func_return 0
+}
+as_func_failure () {
+ as_func_return 1
+}
+as_func_ret_success () {
+ return 0
+}
+as_func_ret_failure () {
+ return 1
+}
+
+exitcode=0
+if as_func_success; then
+ :
else
- as_expr=false
+ exitcode=1
+ echo as_func_success failed.
fi
-if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
- as_basename=basename
+if as_func_failure; then
+ exitcode=1
+ echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+ :
else
- as_basename=false
+ exitcode=1
+ echo as_func_ret_success failed.
fi
-if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
- as_dirname=dirname
+if as_func_ret_failure; then
+ exitcode=1
+ echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+ :
else
- as_dirname=false
+ exitcode=1
+ echo positional parameters were not saved.
fi
-as_me=`$as_basename -- "$0" ||
-$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
- X"$0" : 'X\(//\)$' \| \
- X"$0" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X/"$0" |
- sed '/^.*\/\([^/][^/]*\)\/*$/{
- s//\1/
- q
- }
- /^X\/\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\/\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
+test \$exitcode = 0") || {
+ echo No shell found that supports shell functions.
+ echo Please tell bug-autoconf at gnu.org about your system,
+ echo including any error possibly output before this message.
+ echo This can help us improve future autoconf versions.
+ echo Configuration will now proceed without shell functions.
+}
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
- as_lineno_1=$LINENO as_lineno_1a=$LINENO
- as_lineno_2=$LINENO as_lineno_2a=$LINENO
- eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
- test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
- # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-)
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line after each line using $LINENO; the second 'sed'
+ # does the real work. The second script uses 'N' to pair each
+ # line-number line with the line containing $LINENO, and appends
+ # trailing '-' during substitution so that $LINENO is not a special
+ # case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # scripts with optimization help from Paolo Bonzini. Blame Lee
+ # E. McMahon (1931-1989) for sed's syntax. :-)
sed -n '
p
/[$]LINENO/=
@@ -442,7 +473,8 @@
s/-\n.*//
' >$as_me.lineno &&
chmod +x "$as_me.lineno" ||
- { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+ { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+ { (exit 1); exit 1; }; }
# Don't try to exec as it changes $[0], causing all sort of problems
# (the dirname of $[0] is not the place where we might find the
@@ -452,18 +484,29 @@
exit
}
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
ECHO_C= ECHO_N= ECHO_T=
-case `echo -n x` in #(((((
+case `echo -n x` in
-n*)
- case `echo 'xy\c'` in
+ case `echo 'x\c'` in
*c*) ECHO_T=' ';; # ECHO_T is single tab character.
- xy) ECHO_C='\c';;
- *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
- ECHO_T=' ';;
+ *) ECHO_C='\c';;
esac;;
*)
ECHO_N='-n';;
esac
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
rm -f conf$$ conf$$.exe conf$$.file
if test -d conf$$.dir; then
@@ -493,7 +536,7 @@
rmdir conf$$.dir 2>/dev/null
if mkdir -p . 2>/dev/null; then
- as_mkdir_p='mkdir -p "$as_dir"'
+ as_mkdir_p=:
else
test -d ./-p && rmdir ./-p
as_mkdir_p=false
@@ -512,10 +555,10 @@
if test -d "$1"; then
test -d "$1/.";
else
- case $1 in #(
+ case $1 in
-*)set "./$1";;
esac;
- case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
+ case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in
???[sx]*):;;*)false;;esac;fi
'\'' sh
'
@@ -529,11 +572,11 @@
as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
-test -n "$DJDIR" || exec 7<&0 </dev/null
-exec 6>&1
+exec 7<&0 </dev/null 6>&1
+
# Name of the host.
-# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
# so uname gets run too.
ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
@@ -548,6 +591,7 @@
subdirs=
MFLAGS=
MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='Specfem3D Globe'
@@ -555,7 +599,6 @@
PACKAGE_VERSION='5.1.2'
PACKAGE_STRING='Specfem3D Globe 5.1.2'
PACKAGE_BUGREPORT='jtromp AT princeton.edu'
-PACKAGE_URL=''
ac_unique_file="src/specfem3D/specfem3D.F90"
# Factoring default headers for most tests.
@@ -597,13 +640,16 @@
ac_subst_vars='LTLIBOBJS
LIBOBJS
LOCAL_PATH_IS_ALSO_GLOBAL
+RANLIB
+ARFLAGS
+AR
+MPI_INC
+CUDA_INC
+CUDA_LIB
FLAGS_NO_CHECK
FLAGS_CHECK
MPILIBS
MPIFC
-AR
-ARFLAGS
-RANLIB
EGREP
GREP
CPP
@@ -620,6 +666,8 @@
LDFLAGS
FCFLAGS
FC
+COND_CUDA_FALSE
+COND_CUDA_TRUE
CUSTOM_MPI_TYPE
CUSTOM_REAL
target_alias
@@ -652,7 +700,6 @@
program_transform_name
prefix
exec_prefix
-PACKAGE_URL
PACKAGE_BUGREPORT
PACKAGE_STRING
PACKAGE_VERSION
@@ -664,6 +711,7 @@
ac_user_opts='
enable_option_checking
enable_double_precision
+with_cuda
'
ac_precious_vars='build_alias
host_alias
@@ -680,6 +728,12 @@
MPILIBS
FLAGS_CHECK
FLAGS_NO_CHECK
+CUDA_LIB
+CUDA_INC
+MPI_INC
+AR
+ARFLAGS
+RANLIB
LOCAL_PATH_IS_ALSO_GLOBAL'
@@ -743,9 +797,8 @@
fi
case $ac_option in
- *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
- *=) ac_optarg= ;;
- *) ac_optarg=yes ;;
+ *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+ *) ac_optarg=yes ;;
esac
# Accept the important Cygnus configure options, so we can diagnose typos.
@@ -790,7 +843,8 @@
ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid feature name: $ac_useropt"
+ { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2
+ { (exit 1); exit 1; }; }
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
@@ -816,7 +870,8 @@
ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid feature name: $ac_useropt"
+ { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2
+ { (exit 1); exit 1; }; }
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
@@ -1020,7 +1075,8 @@
ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid package name: $ac_useropt"
+ { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2
+ { (exit 1); exit 1; }; }
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
@@ -1036,7 +1092,8 @@
ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid package name: $ac_useropt"
+ { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2
+ { (exit 1); exit 1; }; }
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
@@ -1066,17 +1123,17 @@
| --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
x_libraries=$ac_optarg ;;
- -*) as_fn_error $? "unrecognized option: \`$ac_option'
-Try \`$0 --help' for more information"
+ -*) { $as_echo "$as_me: error: unrecognized option: $ac_option
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; }
;;
*=*)
ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
# Reject names that are not valid shell variable names.
- case $ac_envvar in #(
- '' | [0-9]* | *[!_$as_cr_alnum]* )
- as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
- esac
+ expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
+ { $as_echo "$as_me: error: invalid variable name: $ac_envvar" >&2
+ { (exit 1); exit 1; }; }
eval $ac_envvar=\$ac_optarg
export $ac_envvar ;;
@@ -1093,13 +1150,15 @@
if test -n "$ac_prev"; then
ac_option=--`echo $ac_prev | sed 's/_/-/g'`
- as_fn_error $? "missing argument to $ac_option"
+ { $as_echo "$as_me: error: missing argument to $ac_option" >&2
+ { (exit 1); exit 1; }; }
fi
if test -n "$ac_unrecognized_opts"; then
case $enable_option_checking in
no) ;;
- fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
+ fatal) { $as_echo "$as_me: error: unrecognized options: $ac_unrecognized_opts" >&2
+ { (exit 1); exit 1; }; } ;;
*) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
esac
fi
@@ -1122,7 +1181,8 @@
[\\/$]* | ?:[\\/]* ) continue;;
NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
esac
- as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
+ { $as_echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+ { (exit 1); exit 1; }; }
done
# There might be people who depend on the old broken behavior: `$host'
@@ -1136,8 +1196,8 @@
if test "x$host_alias" != x; then
if test "x$build_alias" = x; then
cross_compiling=maybe
- $as_echo "$as_me: WARNING: if you wanted to set the --build type, don't use --host.
- If a cross compiler is detected then cross compile mode will be used" >&2
+ $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+ If a cross compiler is detected then cross compile mode will be used." >&2
elif test "x$build_alias" != "x$host_alias"; then
cross_compiling=yes
fi
@@ -1152,9 +1212,11 @@
ac_pwd=`pwd` && test -n "$ac_pwd" &&
ac_ls_di=`ls -di .` &&
ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
- as_fn_error $? "working directory cannot be determined"
+ { $as_echo "$as_me: error: working directory cannot be determined" >&2
+ { (exit 1); exit 1; }; }
test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
- as_fn_error $? "pwd does not report name of working directory"
+ { $as_echo "$as_me: error: pwd does not report name of working directory" >&2
+ { (exit 1); exit 1; }; }
# Find the source files, if location was not specified.
@@ -1193,11 +1255,13 @@
fi
if test ! -r "$srcdir/$ac_unique_file"; then
test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
- as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
+ { $as_echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+ { (exit 1); exit 1; }; }
fi
ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
ac_abs_confdir=`(
- cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
+ cd "$srcdir" && test -r "./$ac_unique_file" || { $as_echo "$as_me: error: $ac_msg" >&2
+ { (exit 1); exit 1; }; }
pwd)`
# When building in place, set srcdir=.
if test "$ac_abs_confdir" = "$ac_pwd"; then
@@ -1237,7 +1301,7 @@
--help=short display options specific to this package
--help=recursive display the short help of all the included packages
-V, --version display version information and exit
- -q, --quiet, --silent do not print \`checking ...' messages
+ -q, --quiet, --silent do not print \`checking...' messages
--cache-file=FILE cache test results in FILE [disabled]
-C, --config-cache alias for \`--cache-file=config.cache'
-n, --no-create do not create output files
@@ -1295,6 +1359,11 @@
--enable-double-precision
solver in double precision [default=no]
+Optional Packages:
+ --with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
+ --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
+ --with-cuda build cuda GPU enabled version [default=no]
+
Some influential environment variables:
FC Fortran compiler command
FCFLAGS Fortran compiler flags
@@ -1303,7 +1372,7 @@
LIBS libraries to pass to the linker, e.g. -l<library>
CC C compiler command
CFLAGS C compiler flags
- CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
+ CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
you have headers in a nonstandard directory <include dir>
CPP C preprocessor
MPIFC MPI Fortran compiler command
@@ -1312,6 +1381,13 @@
FLAGS_NO_CHECK
Fortran compiler flags for creating fast, production-run code
for critical subroutines
+ CUDA_LIB Location of CUDA library libcudart
+ CUDA_INC Location of CUDA include files
+ MPI_INC Location of MPI include mpi.h, which is needed by nvcc when
+ compiling cuda files
+ AR ar library creation
+ ARFLAGS ar flag library creation
+ RANLIB ranlib library creation
LOCAL_PATH_IS_ALSO_GLOBAL
files on a local path on each node are also seen as global with
same path [default=true]
@@ -1383,347 +1459,21 @@
if $ac_init_version; then
cat <<\_ACEOF
Specfem3D Globe configure 5.1.2
-generated by GNU Autoconf 2.67
+generated by GNU Autoconf 2.63
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
This configure script is free software; the Free Software Foundation
gives unlimited permission to copy, distribute and modify it.
_ACEOF
exit
fi
-
-## ------------------------ ##
-## Autoconf initialization. ##
-## ------------------------ ##
-
-# ac_fn_fc_try_compile LINENO
-# ---------------------------
-# Try to compile conftest.$ac_ext, and return whether this succeeded.
-ac_fn_fc_try_compile ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext
- if { { ac_try="$ac_compile"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compile") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_fc_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest.$ac_objext; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
- as_fn_set_status $ac_retval
-
-} # ac_fn_fc_try_compile
-
-# ac_fn_c_try_compile LINENO
-# --------------------------
-# Try to compile conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_compile ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext
- if { { ac_try="$ac_compile"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compile") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_c_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest.$ac_objext; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
- as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_compile
-
-# ac_fn_c_try_link LINENO
-# -----------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_link ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext conftest$ac_exeext
- if { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_c_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest$ac_exeext && {
- test "$cross_compiling" = yes ||
- $as_test_x conftest$ac_exeext
- }; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
- # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
- # interfere with the next link command; also delete a directory that is
- # left behind by Apple's compiler. We do this before executing the actions.
- rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
- as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_link
-
-# ac_fn_c_try_cpp LINENO
-# ----------------------
-# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_cpp ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if { { ac_try="$ac_cpp conftest.$ac_ext"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } > conftest.i && {
- test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
- test ! -s conftest.err
- }; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
- as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_cpp
-
-# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
-# -------------------------------------------------------
-# Tests whether HEADER exists, giving a warning if it cannot be compiled using
-# the include files in INCLUDES and setting the cache variable VAR
-# accordingly.
-ac_fn_c_check_header_mongrel ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if eval "test \"\${$3+set}\"" = set; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
- $as_echo_n "(cached) " >&6
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-else
- # Is the header compilable?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
-$as_echo_n "checking $2 usability... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-#include <$2>
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- ac_header_compiler=yes
-else
- ac_header_compiler=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
-$as_echo "$ac_header_compiler" >&6; }
-
-# Is the header present?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
-$as_echo_n "checking $2 presence... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <$2>
-_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
- ac_header_preproc=yes
-else
- ac_header_preproc=no
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
-$as_echo "$ac_header_preproc" >&6; }
-
-# So? What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
- yes:no: )
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
-$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
- ;;
- no:yes:* )
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
-$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5
-$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
-$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5
-$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
-( $as_echo "## -------------------------------------- ##
-## Report this to jtromp AT princeton.edu ##
-## -------------------------------------- ##"
- ) | sed "s/^/$as_me: WARNING: /" >&2
- ;;
-esac
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
- $as_echo_n "(cached) " >&6
-else
- eval "$3=\$ac_header_compiler"
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-
-} # ac_fn_c_check_header_mongrel
-
-# ac_fn_c_try_run LINENO
-# ----------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
-# that executables *can* be run.
-ac_fn_c_try_run ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
- { { case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
- ac_retval=0
-else
- $as_echo "$as_me: program exited with status $ac_status" >&5
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=$ac_status
-fi
- rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
- as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_run
-
-# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
-# -------------------------------------------------------
-# Tests whether HEADER exists and can be compiled using the include files in
-# INCLUDES, setting the cache variable VAR accordingly.
-ac_fn_c_check_header_compile ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-#include <$2>
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- eval "$3=yes"
-else
- eval "$3=no"
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-
-} # ac_fn_c_check_header_compile
cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by Specfem3D Globe $as_me 5.1.2, which was
-generated by GNU Autoconf 2.67. Invocation command line was
+generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -1759,8 +1509,8 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- $as_echo "PATH: $as_dir"
- done
+ $as_echo "PATH: $as_dir"
+done
IFS=$as_save_IFS
} >&5
@@ -1797,9 +1547,9 @@
ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
esac
case $ac_pass in
- 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+ 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
2)
- as_fn_append ac_configure_args1 " '$ac_arg'"
+ ac_configure_args1="$ac_configure_args1 '$ac_arg'"
if test $ac_must_keep_next = true; then
ac_must_keep_next=false # Got value, back to normal.
else
@@ -1815,13 +1565,13 @@
-* ) ac_must_keep_next=true ;;
esac
fi
- as_fn_append ac_configure_args " '$ac_arg'"
+ ac_configure_args="$ac_configure_args '$ac_arg'"
;;
esac
done
done
-{ ac_configure_args0=; unset ac_configure_args0;}
-{ ac_configure_args1=; unset ac_configure_args1;}
+$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
+$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
# When interrupted or exit'd, cleanup temporary files, and complete
# config.log. We remove comments because anyway the quotes in there
@@ -1833,9 +1583,11 @@
{
echo
- $as_echo "## ---------------- ##
+ cat <<\_ASBOX
+## ---------------- ##
## Cache variables. ##
-## ---------------- ##"
+## ---------------- ##
+_ASBOX
echo
# The following way of writing the cache mishandles newlines in values,
(
@@ -1844,13 +1596,13 @@
case $ac_val in #(
*${as_nl}*)
case $ac_var in #(
- *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+ *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5
$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
esac
case $ac_var in #(
_ | IFS | as_nl) ;; #(
BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
- *) { eval $ac_var=; unset $ac_var;} ;;
+ *) $as_unset $ac_var ;;
esac ;;
esac
done
@@ -1869,9 +1621,11 @@
)
echo
- $as_echo "## ----------------- ##
+ cat <<\_ASBOX
+## ----------------- ##
## Output variables. ##
-## ----------------- ##"
+## ----------------- ##
+_ASBOX
echo
for ac_var in $ac_subst_vars
do
@@ -1884,9 +1638,11 @@
echo
if test -n "$ac_subst_files"; then
- $as_echo "## ------------------- ##
+ cat <<\_ASBOX
+## ------------------- ##
## File substitutions. ##
-## ------------------- ##"
+## ------------------- ##
+_ASBOX
echo
for ac_var in $ac_subst_files
do
@@ -1900,9 +1656,11 @@
fi
if test -s confdefs.h; then
- $as_echo "## ----------- ##
+ cat <<\_ASBOX
+## ----------- ##
## confdefs.h. ##
-## ----------- ##"
+## ----------- ##
+_ASBOX
echo
cat confdefs.h
echo
@@ -1916,53 +1674,46 @@
exit $exit_status
' 0
for ac_signal in 1 2 13 15; do
- trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+ trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
done
ac_signal=0
# confdefs.h avoids OS command line length limits that DEFS can exceed.
rm -f -r conftest* confdefs.h
-$as_echo "/* confdefs.h */" > confdefs.h
-
# Predefined preprocessor variables.
cat >>confdefs.h <<_ACEOF
#define PACKAGE_NAME "$PACKAGE_NAME"
_ACEOF
+
cat >>confdefs.h <<_ACEOF
#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
_ACEOF
+
cat >>confdefs.h <<_ACEOF
#define PACKAGE_VERSION "$PACKAGE_VERSION"
_ACEOF
+
cat >>confdefs.h <<_ACEOF
#define PACKAGE_STRING "$PACKAGE_STRING"
_ACEOF
+
cat >>confdefs.h <<_ACEOF
#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
_ACEOF
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_URL "$PACKAGE_URL"
-_ACEOF
-
# Let the site file select an alternate cache file if it wants to.
# Prefer an explicitly selected file to automatically selected ones.
ac_site_file1=NONE
ac_site_file2=NONE
if test -n "$CONFIG_SITE"; then
- # We do not want a PATH search for config.site.
- case $CONFIG_SITE in #((
- -*) ac_site_file1=./$CONFIG_SITE;;
- */*) ac_site_file1=$CONFIG_SITE;;
- *) ac_site_file1=./$CONFIG_SITE;;
- esac
+ ac_site_file1=$CONFIG_SITE
elif test "x$prefix" != xNONE; then
ac_site_file1=$prefix/share/config.site
ac_site_file2=$prefix/etc/config.site
@@ -1973,23 +1724,19 @@
for ac_site_file in "$ac_site_file1" "$ac_site_file2"
do
test "x$ac_site_file" = xNONE && continue
- if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
+ if test -r "$ac_site_file"; then
+ { $as_echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
$as_echo "$as_me: loading site script $ac_site_file" >&6;}
sed 's/^/| /' "$ac_site_file" >&5
- . "$ac_site_file" \
- || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "failed to load site script $ac_site_file
-See \`config.log' for more details" "$LINENO" 5 ; }
+ . "$ac_site_file"
fi
done
if test -r "$cache_file"; then
- # Some versions of bash will fail to source /dev/null (special files
- # actually), so we avoid doing that. DJGPP emulates it as a regular file.
- if test /dev/null != "$cache_file" && test -f "$cache_file"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
+ # Some versions of bash will fail to source /dev/null (special
+ # files actually), so we avoid doing that.
+ if test -f "$cache_file"; then
+ { $as_echo "$as_me:$LINENO: loading cache $cache_file" >&5
$as_echo "$as_me: loading cache $cache_file" >&6;}
case $cache_file in
[\\/]* | ?:[\\/]* ) . "$cache_file";;
@@ -1997,7 +1744,7 @@
esac
fi
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+ { $as_echo "$as_me:$LINENO: creating cache $cache_file" >&5
$as_echo "$as_me: creating cache $cache_file" >&6;}
>$cache_file
fi
@@ -2012,11 +1759,11 @@
eval ac_new_val=\$ac_env_${ac_var}_value
case $ac_old_set,$ac_new_set in
set,)
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+ { $as_echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
ac_cache_corrupted=: ;;
,set)
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5
+ { $as_echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
ac_cache_corrupted=: ;;
,);;
@@ -2026,17 +1773,17 @@
ac_old_val_w=`echo x $ac_old_val`
ac_new_val_w=`echo x $ac_new_val`
if test "$ac_old_val_w" != "$ac_new_val_w"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5
+ { $as_echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
ac_cache_corrupted=:
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
+ { $as_echo "$as_me:$LINENO: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
eval $ac_var=\$ac_old_val
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5
+ { $as_echo "$as_me:$LINENO: former value: \`$ac_old_val'" >&5
$as_echo "$as_me: former value: \`$ac_old_val'" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5
+ { $as_echo "$as_me:$LINENO: current value: \`$ac_new_val'" >&5
$as_echo "$as_me: current value: \`$ac_new_val'" >&2;}
fi;;
esac
@@ -2048,21 +1795,44 @@
esac
case " $ac_configure_args " in
*" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
- *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+ *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
esac
fi
done
if $ac_cache_corrupted; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
+ { $as_echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
- as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
+$as_echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
+ { (exit 1); exit 1; }; }
fi
-## -------------------- ##
-## Main body of script. ##
-## -------------------- ##
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -2079,7 +1849,7 @@
# 'configure' options
# Check whether --enable-double-precision was given.
-if test "${enable_double_precision+set}" = set; then :
+if test "${enable_double_precision+set}" = set; then
enableval=$enable_double_precision; want_double_precision="$enableval"
else
want_double_precision=no
@@ -2096,6 +1866,23 @@
+# Check whether --with-cuda was given.
+if test "${with_cuda+set}" = set; then
+ withval=$with_cuda; want_cuda="$withval"
+else
+ want_cuda=no
+fi
+
+ if test "$want_cuda" = yes; then
+ COND_CUDA_TRUE=
+ COND_CUDA_FALSE='#'
+else
+ COND_CUDA_TRUE='#'
+ COND_CUDA_FALSE=
+fi
+
+
+
# Checks for programs.
# a courtesy to the installed base of users
@@ -2111,13 +1898,13 @@
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
if test -n "$ac_tool_prefix"; then
- for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
+ for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgf95 lf95 ftn xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
do
# Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_FC+set}" = set; then :
+if test "${ac_cv_prog_FC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$FC"; then
@@ -2128,24 +1915,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_FC="$ac_tool_prefix$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
FC=$ac_cv_prog_FC
if test -n "$FC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $FC" >&5
+ { $as_echo "$as_me:$LINENO: result: $FC" >&5
$as_echo "$FC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -2155,13 +1942,13 @@
fi
if test -z "$FC"; then
ac_ct_FC=$FC
- for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
+ for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgf95 lf95 ftn xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_FC+set}" = set; then :
+if test "${ac_cv_prog_ac_ct_FC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_FC"; then
@@ -2172,24 +1959,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_ac_ct_FC="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
ac_ct_FC=$ac_cv_prog_ac_ct_FC
if test -n "$ac_ct_FC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_FC" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_ct_FC" >&5
$as_echo "$ac_ct_FC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -2202,7 +1989,7 @@
else
case $cross_compiling:$ac_tool_warned in
yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
ac_tool_warned=yes ;;
esac
@@ -2212,32 +1999,45 @@
# Provide some information about the compiler.
-$as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran compiler version" >&5
+$as_echo "$as_me:$LINENO: checking for Fortran compiler version" >&5
set X $ac_compile
ac_compiler=$2
-for ac_option in --version -v -V -qversion; do
- { { ac_try="$ac_compiler $ac_option >&5"
+{ (ac_try="$ac_compiler --version >&5"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler --version >&5") 2>&5
ac_status=$?
- if test -s conftest.err; then
- sed '10a\
-... rest of stderr output deleted ...
- 10q' conftest.err >conftest.er1
- cat conftest.er1 >&5
- fi
- rm -f conftest.er1 conftest.err
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
-done
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -v >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -V >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
rm -f a.out
-cat > conftest.$ac_ext <<_ACEOF
+cat >conftest.$ac_ext <<_ACEOF
program main
end
@@ -2247,8 +2047,8 @@
# Try to create an executable without -o first, disregard a.out.
# It will help us diagnose broken compilers, and finding out an intuition
# of exeext.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the Fortran compiler works" >&5
-$as_echo_n "checking whether the Fortran compiler works... " >&6; }
+{ $as_echo "$as_me:$LINENO: checking for Fortran compiler default output file name" >&5
+$as_echo_n "checking for Fortran compiler default output file name... " >&6; }
ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
# The possible output files:
@@ -2264,17 +2064,17 @@
done
rm -f $ac_rmfiles
-if { { ac_try="$ac_link_default"
+if { (ac_try="$ac_link_default"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
(eval "$ac_link_default") 2>&5
ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then :
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
# Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
# in a Makefile. We should not override ac_cv_exeext if it was cached,
@@ -2291,7 +2091,7 @@
# certainly right.
break;;
*.* )
- if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+ if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
then :; else
ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
fi
@@ -2310,41 +2110,84 @@
else
ac_file=''
fi
-if test -z "$ac_file"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-$as_echo "$as_me: failed program was:" >&5
+
+{ $as_echo "$as_me:$LINENO: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
+if test -z "$ac_file"; then
+ $as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
-{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+{ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error 77 "Fortran compiler cannot create executables
-See \`config.log' for more details" "$LINENO" 5 ; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
+{ { $as_echo "$as_me:$LINENO: error: Fortran compiler cannot create executables
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: Fortran compiler cannot create executables
+See \`config.log' for more details." >&2;}
+ { (exit 77); exit 77; }; }; }
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran compiler default output file name" >&5
-$as_echo_n "checking for Fortran compiler default output file name... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
-$as_echo "$ac_file" >&6; }
+
ac_exeext=$ac_cv_exeext
+# Check that the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:$LINENO: checking whether the Fortran compiler works" >&5
+$as_echo_n "checking whether the Fortran compiler works... " >&6; }
+# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
+# If not cross compiling, check that we can run a simple program.
+if test "$cross_compiling" != yes; then
+ if { ac_try='./$ac_file'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ cross_compiling=no
+ else
+ if test "$cross_compiling" = maybe; then
+ cross_compiling=yes
+ else
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ { $as_echo "$as_me:$LINENO: error: cannot run Fortran compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot run Fortran compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
+ fi
+ fi
+fi
+{ $as_echo "$as_me:$LINENO: result: yes" >&5
+$as_echo "yes" >&6; }
+
rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
ac_clean_files=$ac_clean_files_save
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
+# Check that the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+{ $as_echo "$as_me:$LINENO: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+{ $as_echo "$as_me:$LINENO: checking for suffix of executables" >&5
$as_echo_n "checking for suffix of executables... " >&6; }
-if { { ac_try="$ac_link"
+if { (ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
(eval "$ac_link") 2>&5
ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then :
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
# If both `conftest.exe' and `conftest' are `present' (well, observable)
# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
# work properly (i.e., refer to `conftest.exe'), while it won't with
@@ -2359,93 +2202,44 @@
esac
done
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compute suffix of executables: cannot compile and link
-See \`config.log' for more details" "$LINENO" 5 ; }
+{ { $as_echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
-rm -f conftest conftest$ac_cv_exeext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
+
+rm -f conftest$ac_cv_exeext
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
$as_echo "$ac_cv_exeext" >&6; }
rm -f conftest.$ac_ext
EXEEXT=$ac_cv_exeext
ac_exeext=$EXEEXT
-cat > conftest.$ac_ext <<_ACEOF
- program main
- open(unit=9,file='conftest.out')
- close(unit=9)
-
- end
-_ACEOF
-ac_clean_files="$ac_clean_files conftest.out"
-# Check that the compiler produces executables we can run. If not, either
-# the compiler is broken, or we cross compile.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
-$as_echo_n "checking whether we are cross compiling... " >&6; }
-if test "$cross_compiling" != yes; then
- { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
- if { ac_try='./conftest$ac_cv_exeext'
- { { case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then
- cross_compiling=no
- else
- if test "$cross_compiling" = maybe; then
- cross_compiling=yes
- else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run Fortran compiled programs.
-If you meant to cross compile, use \`--host'.
-See \`config.log' for more details" "$LINENO" 5 ; }
- fi
- fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
-$as_echo "$cross_compiling" >&6; }
-
-rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
-ac_clean_files=$ac_clean_files_save
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
+{ $as_echo "$as_me:$LINENO: checking for suffix of object files" >&5
$as_echo_n "checking for suffix of object files... " >&6; }
-if test "${ac_cv_objext+set}" = set; then :
+if test "${ac_cv_objext+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
end
_ACEOF
rm -f conftest.o conftest.obj
-if { { ac_try="$ac_compile"
+if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
(eval "$ac_compile") 2>&5
ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then :
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
for ac_file in conftest.o conftest.obj conftest.*; do
test -f "$ac_file" || continue;
case $ac_file in
@@ -2458,14 +2252,18 @@
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
-{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+{ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compute suffix of object files: cannot compile
-See \`config.log' for more details" "$LINENO" 5 ; }
+{ { $as_echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
+
rm -f conftest.$ac_cv_objext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
$as_echo "$ac_cv_objext" >&6; }
OBJEXT=$ac_cv_objext
ac_objext=$OBJEXT
@@ -2473,12 +2271,12 @@
# input file. (Note that this only needs to work for GNU compilers.)
ac_save_ext=$ac_ext
ac_ext=F
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU Fortran compiler" >&5
+{ $as_echo "$as_me:$LINENO: checking whether we are using the GNU Fortran compiler" >&5
$as_echo_n "checking whether we are using the GNU Fortran compiler... " >&6; }
-if test "${ac_cv_fc_compiler_gnu+set}" = set; then :
+if test "${ac_cv_fc_compiler_gnu+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
#ifndef __GNUC__
choke me
@@ -2486,44 +2284,86 @@
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_compiler_gnu=yes
else
- ac_compiler_gnu=no
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_compiler_gnu=no
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_cv_fc_compiler_gnu=$ac_compiler_gnu
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_compiler_gnu" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_compiler_gnu" >&5
$as_echo "$ac_cv_fc_compiler_gnu" >&6; }
ac_ext=$ac_save_ext
-ac_test_FCFLAGS=${FCFLAGS+set}
-ac_save_FCFLAGS=$FCFLAGS
+ac_test_FFLAGS=${FCFLAGS+set}
+ac_save_FFLAGS=$FCFLAGS
FCFLAGS=
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $FC accepts -g" >&5
+{ $as_echo "$as_me:$LINENO: checking whether $FC accepts -g" >&5
$as_echo_n "checking whether $FC accepts -g... " >&6; }
-if test "${ac_cv_prog_fc_g+set}" = set; then :
+if test "${ac_cv_prog_fc_g+set}" = set; then
$as_echo_n "(cached) " >&6
else
FCFLAGS=-g
-cat > conftest.$ac_ext <<_ACEOF
+cat >conftest.$ac_ext <<_ACEOF
program main
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_fc_g=yes
else
- ac_cv_prog_fc_g=no
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_prog_fc_g=no
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_g" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_fc_g" >&5
$as_echo "$ac_cv_prog_fc_g" >&6; }
-if test "$ac_test_FCFLAGS" = set; then
- FCFLAGS=$ac_save_FCFLAGS
+if test "$ac_test_FFLAGS" = set; then
+ FCFLAGS=$ac_save_FFLAGS
elif test $ac_cv_prog_fc_g = yes; then
if test "x$ac_cv_fc_compiler_gnu" = xyes; then
FCFLAGS="-g -O2"
@@ -2552,10 +2392,12 @@
flags_guess="$SHELL $srcdir/flags.guess"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: running $flags_guess" >&5
+{ $as_echo "$as_me:$LINENO: running $flags_guess" >&5
$as_echo "$as_me: running $flags_guess" >&6;}
flags=`$flags_guess` ||
- as_fn_error $? "$flags_guess failed" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: $flags_guess failed" >&5
+$as_echo "$as_me: error: $flags_guess failed" >&2;}
+ { (exit 1); exit 1; }; }
eval $flags
@@ -2563,21 +2405,38 @@
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to get verbose linking output from $FC" >&5
+{ $as_echo "$as_me:$LINENO: checking how to get verbose linking output from $FC" >&5
$as_echo_n "checking how to get verbose linking output from $FC... " >&6; }
-if test "${ac_cv_prog_fc_v+set}" = set; then :
+if test "${ac_cv_prog_fc_v+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_fc_v=
# Try some options frequently used verbose output
for ac_verb in -v -verbose --verbose -V -\#\#\#; do
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
end
@@ -2587,17 +2446,17 @@
# 1 to this macro) to the Fortran compiler in order to get
# "verbose" output that we can then parse for the Fortran linker
# flags.
-ac_save_FCFLAGS=$FCFLAGS
+ac_save_FFLAGS=$FCFLAGS
FCFLAGS="$FCFLAGS $ac_verb"
eval "set x $ac_link"
shift
-$as_echo "$as_me:${as_lineno-$LINENO}: $*" >&5
+$as_echo "$as_me:$LINENO: $*" >&5
# gfortran 4.3 outputs lines setting COLLECT_GCC_OPTIONS, COMPILER_PATH,
# LIBRARY_PATH; skip all such settings.
ac_fc_v_output=`eval $ac_link 5>&1 2>&1 |
grep -v 'Driving:' | grep -v "^[_$as_cr_Letters][_$as_cr_alnum]*="`
$as_echo "$ac_fc_v_output" >&5
-FCFLAGS=$ac_save_FCFLAGS
+FCFLAGS=$ac_save_FFLAGS
rm -rf conftest*
@@ -2626,9 +2485,9 @@
# Doubly-quoted arguments were reported for "PGF90/x86 Linux/x86 5.0-2".
*-cmdline\ * | *-ignore\ * | *-def\ *)
ac_fc_v_output=`echo $ac_fc_v_output | sed "\
- s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g
- s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g
- s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;;
+ s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g
+ s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g
+ s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;;
# If we are using Cray Fortran then delete quotes.
*cft90*)
@@ -2639,35 +2498,39 @@
# look for -l* and *.a constructs in the output
for ac_arg in $ac_fc_v_output; do
case $ac_arg in
- [\\/]*.a | ?:[\\/]*.a | -[lLRu]*)
- ac_cv_prog_fc_v=$ac_verb
- break 2 ;;
+ [\\/]*.a | ?:[\\/]*.a | -[lLRu]*)
+ ac_cv_prog_fc_v=$ac_verb
+ break 2 ;;
esac
done
done
if test -z "$ac_cv_prog_fc_v"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot determine how to obtain linking information from $FC" >&5
+ { $as_echo "$as_me:$LINENO: WARNING: cannot determine how to obtain linking information from $FC" >&5
$as_echo "$as_me: WARNING: cannot determine how to obtain linking information from $FC" >&2;}
fi
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: compilation failed" >&5
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:$LINENO: WARNING: compilation failed" >&5
$as_echo "$as_me: WARNING: compilation failed" >&2;}
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_v" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_fc_v" >&5
$as_echo "$ac_cv_prog_fc_v" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran libraries of $FC" >&5
+{ $as_echo "$as_me:$LINENO: checking for Fortran libraries of $FC" >&5
$as_echo_n "checking for Fortran libraries of $FC... " >&6; }
-if test "${ac_cv_fc_libs+set}" = set; then :
+if test "${ac_cv_fc_libs+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test "x$FCLIBS" != "x"; then
ac_cv_fc_libs="$FCLIBS" # Let the user override the test.
else
-cat > conftest.$ac_ext <<_ACEOF
+cat >conftest.$ac_ext <<_ACEOF
program main
end
@@ -2677,17 +2540,17 @@
# 1 to this macro) to the Fortran compiler in order to get
# "verbose" output that we can then parse for the Fortran linker
# flags.
-ac_save_FCFLAGS=$FCFLAGS
+ac_save_FFLAGS=$FCFLAGS
FCFLAGS="$FCFLAGS $ac_cv_prog_fc_v"
eval "set x $ac_link"
shift
-$as_echo "$as_me:${as_lineno-$LINENO}: $*" >&5
+$as_echo "$as_me:$LINENO: $*" >&5
# gfortran 4.3 outputs lines setting COLLECT_GCC_OPTIONS, COMPILER_PATH,
# LIBRARY_PATH; skip all such settings.
ac_fc_v_output=`eval $ac_link 5>&1 2>&1 |
grep -v 'Driving:' | grep -v "^[_$as_cr_Letters][_$as_cr_alnum]*="`
$as_echo "$ac_fc_v_output" >&5
-FCFLAGS=$ac_save_FCFLAGS
+FCFLAGS=$ac_save_FFLAGS
rm -rf conftest*
@@ -2716,9 +2579,9 @@
# Doubly-quoted arguments were reported for "PGF90/x86 Linux/x86 5.0-2".
*-cmdline\ * | *-ignore\ * | *-def\ *)
ac_fc_v_output=`echo $ac_fc_v_output | sed "\
- s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g
- s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g
- s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;;
+ s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g
+ s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g
+ s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;;
# If we are using Cray Fortran then delete quotes.
*cft90*)
@@ -2737,8 +2600,8 @@
shift
ac_arg=$1
case $ac_arg in
- [\\/]*.a | ?:[\\/]*.a)
- ac_exists=false
+ [\\/]*.a | ?:[\\/]*.a)
+ ac_exists=false
for ac_i in $ac_cv_fc_libs; do
if test x"$ac_arg" = x"$ac_i"; then
ac_exists=true
@@ -2746,14 +2609,15 @@
fi
done
- if test x"$ac_exists" = xtrue; then :
-
+ if test x"$ac_exists" = xtrue; then
+ :
else
ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
fi
- ;;
- -bI:*)
- ac_exists=false
+
+ ;;
+ -bI:*)
+ ac_exists=false
for ac_i in $ac_cv_fc_libs; do
if test x"$ac_arg" = x"$ac_i"; then
ac_exists=true
@@ -2761,8 +2625,8 @@
fi
done
- if test x"$ac_exists" = xtrue; then :
-
+ if test x"$ac_exists" = xtrue; then
+ :
else
if test "$ac_compiler_gnu" = yes; then
for ac_link_opt in $ac_arg; do
@@ -2772,18 +2636,18 @@
ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
fi
fi
- ;;
- # Ignore these flags.
- -lang* | -lcrt*.o | -lc | -lgcc* | -lSystem | -libmil | -little \
- |-LANG:=* | -LIST:* | -LNO:* | -link)
- ;;
- -lkernel32)
- test x"$CYGWIN" != xyes && ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
- ;;
- -[LRuYz])
- # These flags, when seen by themselves, take an argument.
- # We remove the space between option and argument and re-iterate
- # unless we find an empty arg or a new option (starting with -)
+
+ ;;
+ # Ignore these flags.
+ -lang* | -lcrt*.o | -lc | -lgcc* | -lSystem | -libmil | -LANG:=* | -LIST:* | -LNO:*)
+ ;;
+ -lkernel32)
+ test x"$CYGWIN" != xyes && ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
+ ;;
+ -[LRuYz])
+ # These flags, when seen by themselves, take an argument.
+ # We remove the space between option and argument and re-iterate
+ # unless we find an empty arg or a new option (starting with -)
case $2 in
"" | -*);;
*)
@@ -2792,10 +2656,10 @@
set X $ac_arg "$@"
;;
esac
- ;;
- -YP,*)
- for ac_j in `$as_echo "$ac_arg" | sed -e 's/-YP,/-L/;s/:/ -L/g'`; do
- ac_exists=false
+ ;;
+ -YP,*)
+ for ac_j in `$as_echo "$ac_arg" | sed -e 's/-YP,/-L/;s/:/ -L/g'`; do
+ ac_exists=false
for ac_i in $ac_cv_fc_libs; do
if test x"$ac_j" = x"$ac_i"; then
ac_exists=true
@@ -2803,16 +2667,17 @@
fi
done
- if test x"$ac_exists" = xtrue; then :
-
+ if test x"$ac_exists" = xtrue; then
+ :
else
ac_arg="$ac_arg $ac_j"
- ac_cv_fc_libs="$ac_cv_fc_libs $ac_j"
+ ac_cv_fc_libs="$ac_cv_fc_libs $ac_j"
fi
- done
- ;;
- -[lLR]*)
- ac_exists=false
+
+ done
+ ;;
+ -[lLR]*)
+ ac_exists=false
for ac_i in $ac_cv_fc_libs; do
if test x"$ac_arg" = x"$ac_i"; then
ac_exists=true
@@ -2820,16 +2685,17 @@
fi
done
- if test x"$ac_exists" = xtrue; then :
-
+ if test x"$ac_exists" = xtrue; then
+ :
else
ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
fi
- ;;
+
+ ;;
-zallextract*| -zdefaultextract)
ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
;;
- # Ignore everything else.
+ # Ignore everything else.
esac
done
# restore positional arguments
@@ -2841,9 +2707,9 @@
case `(uname -sr) 2>/dev/null` in
"SunOS 5"*)
ac_ld_run_path=`$as_echo "$ac_fc_v_output" |
- sed -n 's,^.*LD_RUN_PATH *= *\(/[^ ]*\).*$,-R\1,p'`
+ sed -n 's,^.*LD_RUN_PATH *= *\(/[^ ]*\).*$,-R\1,p'`
test "x$ac_ld_run_path" != x &&
- if test "$ac_compiler_gnu" = yes; then
+ if test "$ac_compiler_gnu" = yes; then
for ac_link_opt in $ac_ld_run_path; do
ac_cv_fc_libs="$ac_cv_fc_libs -Xlinker $ac_link_opt"
done
@@ -2855,7 +2721,7 @@
fi # test "x$[]_AC_LANG_PREFIX[]LIBS" = "x"
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_libs" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_libs" >&5
$as_echo "$ac_cv_fc_libs" >&6; }
FCLIBS="$ac_cv_fc_libs"
@@ -2874,9 +2740,9 @@
if test -n "$ac_tool_prefix"; then
# Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
set dummy ${ac_tool_prefix}gcc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -2887,24 +2753,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="${ac_tool_prefix}gcc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -2914,9 +2780,9 @@
ac_ct_CC=$CC
# Extract the first word of "gcc", so it can be a program name with args.
set dummy gcc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_CC"; then
@@ -2927,24 +2793,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_ac_ct_CC="gcc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
ac_ct_CC=$ac_cv_prog_ac_ct_CC
if test -n "$ac_ct_CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
$as_echo "$ac_ct_CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -2953,7 +2819,7 @@
else
case $cross_compiling:$ac_tool_warned in
yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
ac_tool_warned=yes ;;
esac
@@ -2967,9 +2833,9 @@
if test -n "$ac_tool_prefix"; then
# Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
set dummy ${ac_tool_prefix}cc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -2980,24 +2846,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="${ac_tool_prefix}cc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3007,9 +2873,9 @@
if test -z "$CC"; then
# Extract the first word of "cc", so it can be a program name with args.
set dummy cc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -3021,18 +2887,18 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
ac_prog_rejected=yes
continue
fi
ac_cv_prog_CC="cc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
if test $ac_prog_rejected = yes; then
@@ -3051,10 +2917,10 @@
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3066,9 +2932,9 @@
do
# Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -3079,24 +2945,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3110,9 +2976,9 @@
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_CC"; then
@@ -3123,24 +2989,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_ac_ct_CC="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
ac_ct_CC=$ac_cv_prog_ac_ct_CC
if test -n "$ac_ct_CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
$as_echo "$ac_ct_CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3153,7 +3019,7 @@
else
case $cross_compiling:$ac_tool_warned in
yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
ac_tool_warned=yes ;;
esac
@@ -3164,42 +3030,62 @@
fi
-test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+test -z "$CC" && { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "no acceptable C compiler found in \$PATH
-See \`config.log' for more details" "$LINENO" 5 ; }
+{ { $as_echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
# Provide some information about the compiler.
-$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+$as_echo "$as_me:$LINENO: checking for C compiler version" >&5
set X $ac_compile
ac_compiler=$2
-for ac_option in --version -v -V -qversion; do
- { { ac_try="$ac_compiler $ac_option >&5"
+{ (ac_try="$ac_compiler --version >&5"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler --version >&5") 2>&5
ac_status=$?
- if test -s conftest.err; then
- sed '10a\
-... rest of stderr output deleted ...
- 10q' conftest.err >conftest.er1
- cat conftest.er1 >&5
- fi
- rm -f conftest.er1 conftest.err
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
-done
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -v >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -V >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+{ $as_echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
-if test "${ac_cv_c_compiler_gnu+set}" = set; then :
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -3221,16 +3107,37 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_compiler_gnu=yes
else
- ac_compiler_gnu=no
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_compiler_gnu=no
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_cv_c_compiler_gnu=$ac_compiler_gnu
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
$as_echo "$ac_cv_c_compiler_gnu" >&6; }
if test $ac_compiler_gnu = yes; then
GCC=yes
@@ -3239,16 +3146,20 @@
fi
ac_test_CFLAGS=${CFLAGS+set}
ac_save_CFLAGS=$CFLAGS
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+{ $as_echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
$as_echo_n "checking whether $CC accepts -g... " >&6; }
-if test "${ac_cv_prog_cc_g+set}" = set; then :
+if test "${ac_cv_prog_cc_g+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_save_c_werror_flag=$ac_c_werror_flag
ac_c_werror_flag=yes
ac_cv_prog_cc_g=no
CFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -3267,11 +3178,35 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_g=yes
else
- CFLAGS=""
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ CFLAGS=""
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -3290,12 +3225,36 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ :
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-else
- ac_c_werror_flag=$ac_save_c_werror_flag
+ ac_c_werror_flag=$ac_save_c_werror_flag
CFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -3314,17 +3273,42 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_g=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_c_werror_flag=$ac_save_c_werror_flag
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
$as_echo "$ac_cv_prog_cc_g" >&6; }
if test "$ac_test_CFLAGS" = set; then
CFLAGS=$ac_save_CFLAGS
@@ -3341,14 +3325,18 @@
CFLAGS=
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+{ $as_echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5
$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
-if test "${ac_cv_prog_cc_c89+set}" = set; then :
+if test "${ac_cv_prog_cc_c89+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_cv_prog_cc_c89=no
ac_save_CC=$CC
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <stdarg.h>
#include <stdio.h>
@@ -3413,9 +3401,32 @@
-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
do
CC="$ac_save_CC $ac_arg"
- if ac_fn_c_try_compile "$LINENO"; then :
+ rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_c89=$ac_arg
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext
test "x$ac_cv_prog_cc_c89" != "xno" && break
done
@@ -3426,19 +3437,17 @@
# AC_CACHE_VAL
case "x$ac_cv_prog_cc_c89" in
x)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+ { $as_echo "$as_me:$LINENO: result: none needed" >&5
$as_echo "none needed" >&6; } ;;
xno)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+ { $as_echo "$as_me:$LINENO: result: unsupported" >&5
$as_echo "unsupported" >&6; } ;;
*)
CC="$CC $ac_cv_prog_cc_c89"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5
$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
esac
-if test "x$ac_cv_prog_cc_c89" != xno; then :
-fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
@@ -3452,9 +3461,9 @@
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dummy main to link with Fortran libraries" >&5
+{ $as_echo "$as_me:$LINENO: checking for dummy main to link with Fortran libraries" >&5
$as_echo_n "checking for dummy main to link with Fortran libraries... " >&6; }
-if test "${ac_cv_fc_dummy_main+set}" = set; then :
+if test "${ac_cv_fc_dummy_main+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_fc_dm_save_LIBS=$LIBS
@@ -3467,7 +3476,11 @@
ac_compiler_gnu=$ac_cv_c_compiler_gnu
# First, try linking without a dummy main:
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -3486,17 +3499,46 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_cv_fortran_dummy_main=none
else
- ac_cv_fortran_dummy_main=unknown
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_fortran_dummy_main=unknown
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+
if test $ac_cv_fortran_dummy_main = unknown; then
for ac_func in MAIN__ MAIN_ __main MAIN _MAIN __MAIN main_ main__ _main; do
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#define $ac_fortran_dm_var $ac_func
#ifdef FC_DUMMY_MAIN
@@ -3515,11 +3557,38 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_cv_fortran_dummy_main=$ac_func; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
done
fi
ac_ext=${ac_fc_srcext-f}
@@ -3531,10 +3600,10 @@
LIBS=$ac_fc_dm_save_LIBS
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_dummy_main" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_dummy_main" >&5
$as_echo "$ac_cv_fc_dummy_main" >&6; }
FC_DUMMY_MAIN=$ac_cv_fc_dummy_main
-if test "$FC_DUMMY_MAIN" != unknown; then :
+if test "$FC_DUMMY_MAIN" != unknown; then
if test $FC_DUMMY_MAIN != none; then
cat >>confdefs.h <<_ACEOF
@@ -3543,17 +3612,23 @@
if test "x$ac_cv_fc_dummy_main" = "x$ac_cv_f77_dummy_main"; then
-$as_echo "#define FC_DUMMY_MAIN_EQ_F77 1" >>confdefs.h
+cat >>confdefs.h <<\_ACEOF
+#define FC_DUMMY_MAIN_EQ_F77 1
+_ACEOF
fi
fi
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "linking to Fortran libraries from C fails
-See \`config.log' for more details" "$LINENO" 5 ; }
+{ { $as_echo "$as_me:$LINENO: error: linking to Fortran libraries from C fails
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: linking to Fortran libraries from C fails
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
+
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -3564,12 +3639,12 @@
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran name-mangling scheme" >&5
+{ $as_echo "$as_me:$LINENO: checking for Fortran name-mangling scheme" >&5
$as_echo_n "checking for Fortran name-mangling scheme... " >&6; }
-if test "${ac_cv_fc_mangling+set}" = set; then :
+if test "${ac_cv_fc_mangling+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
subroutine foobar()
return
end
@@ -3577,7 +3652,24 @@
return
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
mv conftest.$ac_objext cfortran_test.$ac_objext
ac_save_LIBS=$LIBS
@@ -3592,7 +3684,11 @@
for ac_foobar in foobar FOOBAR; do
for ac_underscore in "" "_"; do
ac_func="$ac_foobar$ac_underscore"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
@@ -3618,11 +3714,38 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_success=yes; break 2
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
done
done
ac_ext=${ac_fc_srcext-f}
@@ -3650,7 +3773,11 @@
ac_success_extra=no
for ac_extra in "" "_"; do
ac_func="$ac_foo_bar$ac_underscore$ac_extra"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
@@ -3676,11 +3803,38 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_success_extra=yes; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
done
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
@@ -3689,16 +3843,16 @@
if test "$ac_success_extra" = "yes"; then
ac_cv_fc_mangling="$ac_case case"
- if test -z "$ac_underscore"; then
- ac_cv_fc_mangling="$ac_cv_fc_mangling, no underscore"
+ if test -z "$ac_underscore"; then
+ ac_cv_fc_mangling="$ac_cv_fc_mangling, no underscore"
else
- ac_cv_fc_mangling="$ac_cv_fc_mangling, underscore"
- fi
- if test -z "$ac_extra"; then
- ac_cv_fc_mangling="$ac_cv_fc_mangling, no extra underscore"
+ ac_cv_fc_mangling="$ac_cv_fc_mangling, underscore"
+ fi
+ if test -z "$ac_extra"; then
+ ac_cv_fc_mangling="$ac_cv_fc_mangling, no extra underscore"
else
- ac_cv_fc_mangling="$ac_cv_fc_mangling, extra underscore"
- fi
+ ac_cv_fc_mangling="$ac_cv_fc_mangling, extra underscore"
+ fi
else
ac_cv_fc_mangling="unknown"
fi
@@ -3710,15 +3864,22 @@
rm -rf conftest*
rm -f cfortran_test*
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compile a simple Fortran program
-See \`config.log' for more details" "$LINENO" 5 ; }
+{ { $as_echo "$as_me:$LINENO: error: cannot compile a simple Fortran program
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compile a simple Fortran program
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_mangling" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_mangling" >&5
$as_echo "$ac_cv_fc_mangling" >&6; }
ac_ext=c
@@ -3731,51 +3892,85 @@
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
+
+
case $ac_cv_fc_mangling in
"lower case, no underscore, no extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) name" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) name
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) name" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) name
+_ACEOF
;;
"lower case, no underscore, extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) name" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) name
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) name ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) name ## _
+_ACEOF
;;
"lower case, underscore, no extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) name ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) name ## _
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) name ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) name ## _
+_ACEOF
;;
"lower case, underscore, extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) name ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) name ## _
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) name ## __" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) name ## __
+_ACEOF
;;
"upper case, no underscore, no extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) NAME" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) NAME
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) NAME" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) NAME
+_ACEOF
;;
"upper case, no underscore, extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) NAME" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) NAME
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) NAME ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) NAME ## _
+_ACEOF
;;
"upper case, underscore, no extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) NAME ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) NAME ## _
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) NAME ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) NAME ## _
+_ACEOF
;;
"upper case, underscore, extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) NAME ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) NAME ## _
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) NAME ## __" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) NAME ## __
+_ACEOF
;;
*)
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unknown Fortran name-mangling scheme" >&5
+ { $as_echo "$as_me:$LINENO: WARNING: unknown Fortran name-mangling scheme" >&5
$as_echo "$as_me: WARNING: unknown Fortran name-mangling scheme" >&2;}
- ;;
+ ;;
esac
ac_ext=c
@@ -3793,9 +3988,9 @@
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran flag to compile .f90 files" >&5
+{ $as_echo "$as_me:$LINENO: checking for Fortran flag to compile .f90 files" >&5
$as_echo_n "checking for Fortran flag to compile .f90 files... " >&6; }
-if test "${ac_cv_fc_srcext_f90+set}" = set; then :
+if test "${ac_cv_fc_srcext_f90+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_ext=f90
@@ -3804,24 +3999,49 @@
ac_cv_fc_srcext_f90=unknown
for ac_flag in none -qsuffix=f=f90 -Tf; do
test "x$ac_flag" != xnone && ac_fcflags_srcext="$ac_flag"
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_fc_srcext_f90=$ac_flag; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
done
rm -f conftest.$ac_objext conftest.f90
ac_fcflags_srcext=$ac_fcflags_srcext_save
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_srcext_f90" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_srcext_f90" >&5
$as_echo "$ac_cv_fc_srcext_f90" >&6; }
if test "x$ac_cv_fc_srcext_f90" = xunknown; then
- as_fn_error $? "Fortran could not compile .f90 files" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: Fortran could not compile .f90 files" >&5
+$as_echo "$as_me: error: Fortran could not compile .f90 files" >&2;}
+ { (exit 1); exit 1; }; }
else
ac_fc_srcext=f90
if test "x$ac_cv_fc_srcext_f90" = xnone; then
@@ -3843,38 +4063,63 @@
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran flag needed to accept free-form source" >&5
-$as_echo_n "checking for Fortran flag needed to accept free-form source... " >&6; }
-if test "${ac_cv_fc_freeform+set}" = set; then :
+{ $as_echo "$as_me:$LINENO: checking for Fortran flag needed to allow free-form source" >&5
+$as_echo_n "checking for Fortran flag needed to allow free-form source... " >&6; }
+if test "${ac_cv_fc_freeform+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_cv_fc_freeform=unknown
ac_fc_freeform_FCFLAGS_save=$FCFLAGS
for ac_flag in none -ffree-form -FR -free -qfree -Mfree -Mfreeform \
- -freeform "-f free" +source=free -nfix
+ -freeform "-f free"
do
test "x$ac_flag" != xnone && FCFLAGS="$ac_fc_freeform_FCFLAGS_save $ac_flag"
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program freeform
! FIXME: how to best confuse non-freeform compilers?
print *, 'Hello ', &
- 'world.'
+ 'world.'
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_fc_freeform=$ac_flag; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
done
rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
FCFLAGS=$ac_fc_freeform_FCFLAGS_save
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_freeform" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_freeform" >&5
$as_echo "$ac_cv_fc_freeform" >&6; }
if test "x$ac_cv_fc_freeform" = xunknown; then
- as_fn_error 77 "Fortran does not accept free-form source" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: Fortran does not accept free-form source" >&5
+$as_echo "$as_me: error: Fortran does not accept free-form source" >&2;}
+ { (exit 77); exit 77; }; }
else
if test "x$ac_cv_fc_freeform" != xnone; then
FCFLAGS="$FCFLAGS $ac_cv_fc_freeform"
@@ -3895,9 +4140,9 @@
if test -n "$ac_tool_prefix"; then
# Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
set dummy ${ac_tool_prefix}gcc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -3908,24 +4153,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="${ac_tool_prefix}gcc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3935,9 +4180,9 @@
ac_ct_CC=$CC
# Extract the first word of "gcc", so it can be a program name with args.
set dummy gcc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_CC"; then
@@ -3948,24 +4193,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_ac_ct_CC="gcc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
ac_ct_CC=$ac_cv_prog_ac_ct_CC
if test -n "$ac_ct_CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
$as_echo "$ac_ct_CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3974,7 +4219,7 @@
else
case $cross_compiling:$ac_tool_warned in
yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
ac_tool_warned=yes ;;
esac
@@ -3988,9 +4233,9 @@
if test -n "$ac_tool_prefix"; then
# Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
set dummy ${ac_tool_prefix}cc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -4001,24 +4246,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="${ac_tool_prefix}cc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -4028,9 +4273,9 @@
if test -z "$CC"; then
# Extract the first word of "cc", so it can be a program name with args.
set dummy cc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -4042,18 +4287,18 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
ac_prog_rejected=yes
continue
fi
ac_cv_prog_CC="cc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
if test $ac_prog_rejected = yes; then
@@ -4072,10 +4317,10 @@
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -4087,9 +4332,9 @@
do
# Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -4100,24 +4345,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -4131,9 +4376,9 @@
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_CC"; then
@@ -4144,24 +4389,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_ac_ct_CC="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
ac_ct_CC=$ac_cv_prog_ac_ct_CC
if test -n "$ac_ct_CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
$as_echo "$ac_ct_CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -4174,7 +4419,7 @@
else
case $cross_compiling:$ac_tool_warned in
yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
ac_tool_warned=yes ;;
esac
@@ -4185,42 +4430,62 @@
fi
-test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+test -z "$CC" && { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "no acceptable C compiler found in \$PATH
-See \`config.log' for more details" "$LINENO" 5 ; }
+{ { $as_echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
# Provide some information about the compiler.
-$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+$as_echo "$as_me:$LINENO: checking for C compiler version" >&5
set X $ac_compile
ac_compiler=$2
-for ac_option in --version -v -V -qversion; do
- { { ac_try="$ac_compiler $ac_option >&5"
+{ (ac_try="$ac_compiler --version >&5"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler --version >&5") 2>&5
ac_status=$?
- if test -s conftest.err; then
- sed '10a\
-... rest of stderr output deleted ...
- 10q' conftest.err >conftest.er1
- cat conftest.er1 >&5
- fi
- rm -f conftest.er1 conftest.err
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
-done
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -v >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -V >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+{ $as_echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
-if test "${ac_cv_c_compiler_gnu+set}" = set; then :
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -4242,16 +4507,37 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_compiler_gnu=yes
else
- ac_compiler_gnu=no
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_compiler_gnu=no
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_cv_c_compiler_gnu=$ac_compiler_gnu
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
$as_echo "$ac_cv_c_compiler_gnu" >&6; }
if test $ac_compiler_gnu = yes; then
GCC=yes
@@ -4260,16 +4546,20 @@
fi
ac_test_CFLAGS=${CFLAGS+set}
ac_save_CFLAGS=$CFLAGS
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+{ $as_echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
$as_echo_n "checking whether $CC accepts -g... " >&6; }
-if test "${ac_cv_prog_cc_g+set}" = set; then :
+if test "${ac_cv_prog_cc_g+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_save_c_werror_flag=$ac_c_werror_flag
ac_c_werror_flag=yes
ac_cv_prog_cc_g=no
CFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -4288,11 +4578,35 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_g=yes
else
- CFLAGS=""
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ CFLAGS=""
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -4311,12 +4625,36 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ :
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-else
- ac_c_werror_flag=$ac_save_c_werror_flag
+ ac_c_werror_flag=$ac_save_c_werror_flag
CFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -4335,17 +4673,42 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_g=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_c_werror_flag=$ac_save_c_werror_flag
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
$as_echo "$ac_cv_prog_cc_g" >&6; }
if test "$ac_test_CFLAGS" = set; then
CFLAGS=$ac_save_CFLAGS
@@ -4362,14 +4725,18 @@
CFLAGS=
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+{ $as_echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5
$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
-if test "${ac_cv_prog_cc_c89+set}" = set; then :
+if test "${ac_cv_prog_cc_c89+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_cv_prog_cc_c89=no
ac_save_CC=$CC
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <stdarg.h>
#include <stdio.h>
@@ -4434,9 +4801,32 @@
-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
do
CC="$ac_save_CC $ac_arg"
- if ac_fn_c_try_compile "$LINENO"; then :
+ rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_c89=$ac_arg
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext
test "x$ac_cv_prog_cc_c89" != "xno" && break
done
@@ -4447,19 +4837,17 @@
# AC_CACHE_VAL
case "x$ac_cv_prog_cc_c89" in
x)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+ { $as_echo "$as_me:$LINENO: result: none needed" >&5
$as_echo "none needed" >&6; } ;;
xno)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+ { $as_echo "$as_me:$LINENO: result: unsupported" >&5
$as_echo "unsupported" >&6; } ;;
*)
CC="$CC $ac_cv_prog_cc_c89"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5
$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
esac
-if test "x$ac_cv_prog_cc_c89" != xno; then :
-fi
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
@@ -4478,14 +4866,14 @@
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
+{ $as_echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5
$as_echo_n "checking how to run the C preprocessor... " >&6; }
# On Suns, sometimes $CPP names a directory.
if test -n "$CPP" && test -d "$CPP"; then
CPP=
fi
if test -z "$CPP"; then
- if test "${ac_cv_prog_CPP+set}" = set; then :
+ if test "${ac_cv_prog_CPP+set}" = set; then
$as_echo_n "(cached) " >&6
else
# Double quotes because CPP needs to be expanded
@@ -4500,7 +4888,11 @@
# <limits.h> exists even on freestanding compilers.
# On the NeXT, cc -E runs the code through the compiler's parser,
# not just through cpp. "Syntax error" is here to catch this case.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef __STDC__
# include <limits.h>
@@ -4509,34 +4901,78 @@
#endif
Syntax error
_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ :
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-else
# Broken: fails on valid input.
continue
fi
-rm -f conftest.err conftest.i conftest.$ac_ext
+rm -f conftest.err conftest.$ac_ext
+
# OK, works on sane cases. Now check whether nonexistent headers
# can be detected and how.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <ac_nonexistent.h>
_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
# Broken: success on invalid input.
continue
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
# Passes both tests.
ac_preproc_ok=:
break
fi
-rm -f conftest.err conftest.i conftest.$ac_ext
+rm -f conftest.err conftest.$ac_ext
+
done
# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.i conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then :
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
break
fi
@@ -4548,7 +4984,7 @@
else
ac_cv_prog_CPP=$CPP
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+{ $as_echo "$as_me:$LINENO: result: $CPP" >&5
$as_echo "$CPP" >&6; }
ac_preproc_ok=false
for ac_c_preproc_warn_flag in '' yes
@@ -4559,7 +4995,11 @@
# <limits.h> exists even on freestanding compilers.
# On the NeXT, cc -E runs the code through the compiler's parser,
# not just through cpp. "Syntax error" is here to catch this case.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef __STDC__
# include <limits.h>
@@ -4568,40 +5008,87 @@
#endif
Syntax error
_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ :
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-else
# Broken: fails on valid input.
continue
fi
-rm -f conftest.err conftest.i conftest.$ac_ext
+rm -f conftest.err conftest.$ac_ext
+
# OK, works on sane cases. Now check whether nonexistent headers
# can be detected and how.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <ac_nonexistent.h>
_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
# Broken: success on invalid input.
continue
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
# Passes both tests.
ac_preproc_ok=:
break
fi
-rm -f conftest.err conftest.i conftest.$ac_ext
+rm -f conftest.err conftest.$ac_ext
+
done
# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.i conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then :
-
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+ :
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
-See \`config.log' for more details" "$LINENO" 5 ; }
+{ { $as_echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
ac_ext=c
@@ -4611,9 +5098,9 @@
ac_compiler_gnu=$ac_cv_c_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+{ $as_echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5
$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
-if test "${ac_cv_path_GREP+set}" = set; then :
+if test "${ac_cv_path_GREP+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -z "$GREP"; then
@@ -4624,7 +5111,7 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_prog in grep ggrep; do
+ for ac_prog in grep ggrep; do
for ac_exec_ext in '' $ac_executable_extensions; do
ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
{ test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
@@ -4644,7 +5131,7 @@
$as_echo 'GREP' >> "conftest.nl"
"$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
- as_fn_arith $ac_count + 1 && ac_count=$as_val
+ ac_count=`expr $ac_count + 1`
if test $ac_count -gt ${ac_path_GREP_max-0}; then
# Best one so far, save it but keep looking for a better one
ac_cv_path_GREP="$ac_path_GREP"
@@ -4659,24 +5146,26 @@
$ac_path_GREP_found && break 3
done
done
- done
+done
IFS=$as_save_IFS
if test -z "$ac_cv_path_GREP"; then
- as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+$as_echo "$as_me: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+ { (exit 1); exit 1; }; }
fi
else
ac_cv_path_GREP=$GREP
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5
$as_echo "$ac_cv_path_GREP" >&6; }
GREP="$ac_cv_path_GREP"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+{ $as_echo "$as_me:$LINENO: checking for egrep" >&5
$as_echo_n "checking for egrep... " >&6; }
-if test "${ac_cv_path_EGREP+set}" = set; then :
+if test "${ac_cv_path_EGREP+set}" = set; then
$as_echo_n "(cached) " >&6
else
if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
@@ -4690,7 +5179,7 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_prog in egrep; do
+ for ac_prog in egrep; do
for ac_exec_ext in '' $ac_executable_extensions; do
ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
{ test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
@@ -4710,7 +5199,7 @@
$as_echo 'EGREP' >> "conftest.nl"
"$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
- as_fn_arith $ac_count + 1 && ac_count=$as_val
+ ac_count=`expr $ac_count + 1`
if test $ac_count -gt ${ac_path_EGREP_max-0}; then
# Best one so far, save it but keep looking for a better one
ac_cv_path_EGREP="$ac_path_EGREP"
@@ -4725,10 +5214,12 @@
$ac_path_EGREP_found && break 3
done
done
- done
+done
IFS=$as_save_IFS
if test -z "$ac_cv_path_EGREP"; then
- as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+$as_echo "$as_me: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+ { (exit 1); exit 1; }; }
fi
else
ac_cv_path_EGREP=$EGREP
@@ -4736,17 +5227,21 @@
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5
$as_echo "$ac_cv_path_EGREP" >&6; }
EGREP="$ac_cv_path_EGREP"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+{ $as_echo "$as_me:$LINENO: checking for ANSI C header files" >&5
$as_echo_n "checking for ANSI C header files... " >&6; }
-if test "${ac_cv_header_stdc+set}" = set; then :
+if test "${ac_cv_header_stdc+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <stdlib.h>
#include <stdarg.h>
@@ -4769,23 +5264,48 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_header_stdc=yes
else
- ac_cv_header_stdc=no
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_header_stdc=no
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
if test $ac_cv_header_stdc = yes; then
# SunOS 4.x string.h does not declare mem*, contrary to ANSI.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <string.h>
_ACEOF
if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "memchr" >/dev/null 2>&1; then :
-
+ $EGREP "memchr" >/dev/null 2>&1; then
+ :
else
ac_cv_header_stdc=no
fi
@@ -4795,14 +5315,18 @@
if test $ac_cv_header_stdc = yes; then
# ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <stdlib.h>
_ACEOF
if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "free" >/dev/null 2>&1; then :
-
+ $EGREP "free" >/dev/null 2>&1; then
+ :
else
ac_cv_header_stdc=no
fi
@@ -4812,10 +5336,14 @@
if test $ac_cv_header_stdc = yes; then
# /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
- if test "$cross_compiling" = yes; then :
+ if test "$cross_compiling" = yes; then
:
else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <ctype.h>
#include <stdlib.h>
@@ -4842,33 +5370,118 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ $as_echo "$as_me: program exited with status $ac_status" >&5
+$as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-else
- ac_cv_header_stdc=no
+( exit $ac_status )
+ac_cv_header_stdc=no
fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -rf conftest.dSYM
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
fi
+
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
$as_echo "$ac_cv_header_stdc" >&6; }
if test $ac_cv_header_stdc = yes; then
-$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+cat >>confdefs.h <<\_ACEOF
+#define STDC_HEADERS 1
+_ACEOF
fi
# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+
+
+
+
+
+
+
+
+
for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
inttypes.h stdint.h unistd.h
-do :
- as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
-ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
-"
-if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
+do
+as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+{ $as_echo "$as_me:$LINENO: checking for $ac_header" >&5
+$as_echo_n "checking for $ac_header... " >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+ $as_echo_n "(cached) " >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ eval "$as_ac_Header=yes"
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_Header=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+ac_res=`eval 'as_val=${'$as_ac_Header'}
+ $as_echo "$as_val"'`
+ { $as_echo "$as_me:$LINENO: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+as_val=`eval 'as_val=${'$as_ac_Header'}
+ $as_echo "$as_val"'`
+ if test "x$as_val" = x""yes; then
cat >>confdefs.h <<_ACEOF
#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
_ACEOF
@@ -4878,22 +5491,288 @@
done
-ac_fn_c_check_header_mongrel "$LINENO" "emmintrin.h" "ac_cv_header_emmintrin_h" "$ac_includes_default"
-if test "x$ac_cv_header_emmintrin_h" = x""yes; then :
+if test "${ac_cv_header_emmintrin_h+set}" = set; then
+ { $as_echo "$as_me:$LINENO: checking for emmintrin.h" >&5
+$as_echo_n "checking for emmintrin.h... " >&6; }
+if test "${ac_cv_header_emmintrin_h+set}" = set; then
+ $as_echo_n "(cached) " >&6
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_emmintrin_h" >&5
+$as_echo "$ac_cv_header_emmintrin_h" >&6; }
+else
+ # Is the header compilable?
+{ $as_echo "$as_me:$LINENO: checking emmintrin.h usability" >&5
+$as_echo_n "checking emmintrin.h usability... " >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+#include <emmintrin.h>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_header_compiler=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-$as_echo "#define HAVE_EMMINTRIN 1" >>confdefs.h
+ ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:$LINENO: checking emmintrin.h presence" >&5
+$as_echo_n "checking emmintrin.h presence... " >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <emmintrin.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ ac_header_preproc=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_preproc=no
fi
+rm -f conftest.err conftest.$ac_ext
+{ $as_echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
-ac_fn_c_check_header_mongrel "$LINENO" "xmmintrin.h" "ac_cv_header_xmmintrin_h" "$ac_includes_default"
-if test "x$ac_cv_header_xmmintrin_h" = x""yes; then :
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+ yes:no: )
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: proceeding with the compiler's result" >&2;}
+ ac_header_preproc=yes
+ ;;
+ no:yes:* )
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: present but cannot be compiled" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: check for missing prerequisite headers?" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: see the Autoconf documentation" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: section \"Present But Cannot Be Compiled\"" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: proceeding with the preprocessor's result" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: proceeding with the preprocessor's result" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: in the future, the compiler will take precedence" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: in the future, the compiler will take precedence" >&2;}
+ ( cat <<\_ASBOX
+## -------------------------------------- ##
+## Report this to jtromp AT princeton.edu ##
+## -------------------------------------- ##
+_ASBOX
+ ) | sed "s/^/$as_me: WARNING: /" >&2
+ ;;
+esac
+{ $as_echo "$as_me:$LINENO: checking for emmintrin.h" >&5
+$as_echo_n "checking for emmintrin.h... " >&6; }
+if test "${ac_cv_header_emmintrin_h+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ ac_cv_header_emmintrin_h=$ac_header_preproc
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_emmintrin_h" >&5
+$as_echo "$ac_cv_header_emmintrin_h" >&6; }
-$as_echo "#define HAVE_XMMINTRIN 1" >>confdefs.h
+fi
+if test "x$ac_cv_header_emmintrin_h" = x""yes; then
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_EMMINTRIN 1
+_ACEOF
+
fi
+if test "${ac_cv_header_xmmintrin_h+set}" = set; then
+ { $as_echo "$as_me:$LINENO: checking for xmmintrin.h" >&5
+$as_echo_n "checking for xmmintrin.h... " >&6; }
+if test "${ac_cv_header_xmmintrin_h+set}" = set; then
+ $as_echo_n "(cached) " >&6
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_xmmintrin_h" >&5
+$as_echo "$ac_cv_header_xmmintrin_h" >&6; }
+else
+ # Is the header compilable?
+{ $as_echo "$as_me:$LINENO: checking xmmintrin.h usability" >&5
+$as_echo_n "checking xmmintrin.h usability... " >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+#include <xmmintrin.h>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_header_compiler=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:$LINENO: checking xmmintrin.h presence" >&5
+$as_echo_n "checking xmmintrin.h presence... " >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <xmmintrin.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ ac_header_preproc=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ $as_echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+ yes:no: )
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: proceeding with the compiler's result" >&2;}
+ ac_header_preproc=yes
+ ;;
+ no:yes:* )
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: present but cannot be compiled" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: check for missing prerequisite headers?" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: see the Autoconf documentation" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: section \"Present But Cannot Be Compiled\"" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: proceeding with the preprocessor's result" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: proceeding with the preprocessor's result" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: in the future, the compiler will take precedence" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: in the future, the compiler will take precedence" >&2;}
+ ( cat <<\_ASBOX
+## -------------------------------------- ##
+## Report this to jtromp AT princeton.edu ##
+## -------------------------------------- ##
+_ASBOX
+ ) | sed "s/^/$as_me: WARNING: /" >&2
+ ;;
+esac
+{ $as_echo "$as_me:$LINENO: checking for xmmintrin.h" >&5
+$as_echo_n "checking for xmmintrin.h... " >&6; }
+if test "${ac_cv_header_xmmintrin_h+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ ac_cv_header_xmmintrin_h=$ac_header_preproc
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_xmmintrin_h" >&5
+$as_echo "$ac_cv_header_xmmintrin_h" >&6; }
+
+fi
+if test "x$ac_cv_header_xmmintrin_h" = x""yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_XMMINTRIN 1
+_ACEOF
+
+fi
+
+
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
@@ -4906,6 +5785,13 @@
+
+
+
+
+
+
+
if test x"$MPIFC" = x; then
MPIFC=mpif90
fi
@@ -4940,9 +5826,9 @@
FC=$MPIFC
FCFLAGS="$FCFLAGS $FLAGS_NO_CHECK"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mpif.h works" >&5
+{ $as_echo "$as_me:$LINENO: checking whether mpif.h works" >&5
$as_echo_n "checking whether mpif.h works... " >&6; }
-cat > conftest.$ac_ext <<_ACEOF
+cat >conftest.$ac_ext <<_ACEOF
program main
@@ -4956,14 +5842,34 @@
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+ { $as_echo "$as_me:$LINENO: result: yes" >&5
$as_echo "yes" >&6; }
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
cit_mpif_h=unknown
cit_mpifc_info=`$FC -compile_info 2>/dev/null`
@@ -4973,13 +5879,16 @@
esac
done
if test "$cit_mpif_h" == "unknown"; then
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compile a trivial MPI program using $MPIFC
-See \`config.log' for more details" "$LINENO" 5 ; }
+{ { $as_echo "$as_me:$LINENO: error: cannot compile a trivial MPI program using $MPIFC
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compile a trivial MPI program using $MPIFC
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ofile" >&5
+ { $as_echo "$as_me:$LINENO: creating $ofile" >&5
$as_echo "$as_me: creating $ofile" >&6;}
cat >"$cfgfile" <<END_OF_HEADER
! $ofile. Generated from $cit_mpif_h by configure.
@@ -4989,9 +5898,9 @@
mv -f "$cfgfile" "$ofile" || \
(rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether generated mpif.h works" >&5
+ { $as_echo "$as_me:$LINENO: checking whether generated mpif.h works" >&5
$as_echo_n "checking whether generated mpif.h works... " >&6; }
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
@@ -5005,25 +5914,50 @@
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+ { $as_echo "$as_me:$LINENO: result: yes" >&5
$as_echo "yes" >&6; }
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compile a trivial MPI program using $MPIFC
-See \`config.log' for more details" "$LINENO" 5 ; }
+{ { $as_echo "$as_me:$LINENO: error: cannot compile a trivial MPI program using $MPIFC
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compile a trivial MPI program using $MPIFC
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
FC=$cit_fc_save_fc
@@ -5072,13 +6006,13 @@
case $ac_val in #(
*${as_nl}*)
case $ac_var in #(
- *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+ *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5
$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
esac
case $ac_var in #(
_ | IFS | as_nl) ;; #(
BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
- *) { eval $ac_var=; unset $ac_var;} ;;
+ *) $as_unset $ac_var ;;
esac ;;
esac
done
@@ -5086,8 +6020,8 @@
(set) 2>&1 |
case $as_nl`(ac_space=' '; set) 2>&1` in #(
*${as_nl}ac_space=\ *)
- # `set' does not quote correctly, so add quotes: double-quote
- # substitution turns \\\\ into \\, and sed turns \\ into \.
+ # `set' does not quote correctly, so add quotes (double-quote
+ # substitution turns \\\\ into \\, and sed turns \\ into \).
sed -n \
"s/'/'\\\\''/g;
s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
@@ -5110,11 +6044,11 @@
if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
if test -w "$cache_file"; then
test "x$cache_file" != "x/dev/null" &&
- { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
+ { $as_echo "$as_me:$LINENO: updating cache $cache_file" >&5
$as_echo "$as_me: updating cache $cache_file" >&6;}
cat confcache >$cache_file
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
+ { $as_echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5
$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
fi
fi
@@ -5128,30 +6062,35 @@
ac_libobjs=
ac_ltlibobjs=
-U=
for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
# 1. Remove the extension, and $U if already installed.
ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
# 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR
# will be set to the directory where LIBOBJS objects are built.
- as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
- as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+ ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+ ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo'
done
LIBOBJS=$ac_libobjs
LTLIBOBJS=$ac_ltlibobjs
+if test -z "${COND_CUDA_TRUE}" && test -z "${COND_CUDA_FALSE}"; then
+ { { $as_echo "$as_me:$LINENO: error: conditional \"COND_CUDA\" was never defined.
+Usually this means the macro was only invoked conditionally." >&5
+$as_echo "$as_me: error: conditional \"COND_CUDA\" was never defined.
+Usually this means the macro was only invoked conditionally." >&2;}
+ { (exit 1); exit 1; }; }
+fi
: ${CONFIG_STATUS=./config.status}
ac_write_fail=0
ac_clean_files_save=$ac_clean_files
ac_clean_files="$ac_clean_files $CONFIG_STATUS"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+{ $as_echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
-as_write_fail=0
-cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+cat >$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
#! $SHELL
# Generated by $as_me.
# Run this file to recreate the current configuration.
@@ -5161,18 +6100,17 @@
debug=false
ac_cs_recheck=false
ac_cs_silent=false
-
SHELL=\${CONFIG_SHELL-$SHELL}
-export SHELL
-_ASEOF
-cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
-## -------------------- ##
-## M4sh Initialization. ##
-## -------------------- ##
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
# Be more Bourne compatible
DUALCASE=1; export DUALCASE # for MKS sh
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
emulate sh
NULLCMD=:
# Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
@@ -5180,15 +6118,23 @@
alias -g '${1+"$@"}'='"$@"'
setopt NO_GLOB_SUBST
else
- case `(set -o) 2>/dev/null` in #(
- *posix*) :
- set -o posix ;; #(
- *) :
- ;;
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
esac
+
fi
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
as_nl='
'
export as_nl
@@ -5196,13 +6142,7 @@
as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
-# Prefer a ksh shell builtin over an external printf program on Solaris,
-# but without wasting forks for bash or zsh.
-if test -z "$BASH_VERSION$ZSH_VERSION" \
- && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
- as_echo='print -r --'
- as_echo_n='print -rn --'
-elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
as_echo='printf %s\n'
as_echo_n='printf %s'
else
@@ -5213,7 +6153,7 @@
as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
as_echo_n_body='eval
arg=$1;
- case $arg in #(
+ case $arg in
*"$as_nl"*)
expr "X$arg" : "X\\(.*\\)$as_nl";
arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
@@ -5236,7 +6176,14 @@
}
fi
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
# IFS
# We need space, tab and new line, in precisely that order. Quoting is
# there to prevent editors from complaining about space-tab.
@@ -5245,15 +6192,15 @@
IFS=" "" $as_nl"
# Find who we are. Look in the path if we contain no directory separator.
-case $0 in #((
+case $0 in
*[\\/]* ) as_myself=$0 ;;
*) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
- done
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
IFS=$as_save_IFS
;;
@@ -5265,16 +6212,12 @@
fi
if test ! -f "$as_myself"; then
$as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
- exit 1
+ { (exit 1); exit 1; }
fi
-# Unset variables that we do not need and which cause bugs (e.g. in
-# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
-# suppresses any "Segmentation fault" message there. '((' could
-# trigger a bug in pdksh 5.2.14.
-for as_var in BASH_ENV ENV MAIL MAILPATH
-do eval test x\${$as_var+set} = xset \
- && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
done
PS1='$ '
PS2='> '
@@ -5286,89 +6229,7 @@
LANGUAGE=C
export LANGUAGE
-# CDPATH.
-(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
-
-
-# as_fn_error STATUS ERROR [LINENO LOG_FD]
-# ----------------------------------------
-# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
-# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with STATUS, using 1 if that was 0.
-as_fn_error ()
-{
- as_status=$1; test $as_status -eq 0 && as_status=1
- if test "$4"; then
- as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
- fi
- $as_echo "$as_me: error: $2" >&2
- as_fn_exit $as_status
-} # as_fn_error
-
-
-# as_fn_set_status STATUS
-# -----------------------
-# Set $? to STATUS, without forking.
-as_fn_set_status ()
-{
- return $1
-} # as_fn_set_status
-
-# as_fn_exit STATUS
-# -----------------
-# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
-as_fn_exit ()
-{
- set +e
- as_fn_set_status $1
- exit $1
-} # as_fn_exit
-
-# as_fn_unset VAR
-# ---------------
-# Portably unset VAR.
-as_fn_unset ()
-{
- { eval $1=; unset $1;}
-}
-as_unset=as_fn_unset
-# as_fn_append VAR VALUE
-# ----------------------
-# Append the text in VALUE to the end of the definition contained in VAR. Take
-# advantage of any shell optimizations that allow amortized linear growth over
-# repeated appends, instead of the typical quadratic growth present in naive
-# implementations.
-if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
- eval 'as_fn_append ()
- {
- eval $1+=\$2
- }'
-else
- as_fn_append ()
- {
- eval $1=\$$1\$2
- }
-fi # as_fn_append
-
-# as_fn_arith ARG...
-# ------------------
-# Perform arithmetic evaluation on the ARGs, and store the result in the
-# global $as_val. Take advantage of shells that can avoid forks. The arguments
-# must be portable across $(()) and expr.
-if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
- eval 'as_fn_arith ()
- {
- as_val=$(( $* ))
- }'
-else
- as_fn_arith ()
- {
- as_val=`expr "$@" || test $? -eq 1`
- }
-fi # as_fn_arith
-
-
+# Required to use basename.
if expr a : '\(a\)' >/dev/null 2>&1 &&
test "X`expr 00001 : '.*\(...\)'`" = X001; then
as_expr=expr
@@ -5382,12 +6243,8 @@
as_basename=false
fi
-if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
- as_dirname=dirname
-else
- as_dirname=false
-fi
+# Name of the executable.
as_me=`$as_basename -- "$0" ||
$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
X"$0" : 'X\(//\)$' \| \
@@ -5407,25 +6264,76 @@
}
s/.*/./; q'`
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
+# CDPATH.
+$as_unset CDPATH
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line after each line using $LINENO; the second 'sed'
+ # does the real work. The second script uses 'N' to pair each
+ # line-number line with the line containing $LINENO, and appends
+ # trailing '-' during substitution so that $LINENO is not a special
+ # case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # scripts with optimization help from Paolo Bonzini. Blame Lee
+ # E. McMahon (1931-1989) for sed's syntax. :-)
+ sed -n '
+ p
+ /[$]LINENO/=
+ ' <$as_myself |
+ sed '
+ s/[$]LINENO.*/&-/
+ t lineno
+ b
+ :lineno
+ N
+ :loop
+ s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+ t loop
+ s/-\n.*//
+ ' >$as_me.lineno &&
+ chmod +x "$as_me.lineno" ||
+ { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensitive to this).
+ . "./$as_me.lineno"
+ # Exit status is that of the last command.
+ exit
+}
+
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
ECHO_C= ECHO_N= ECHO_T=
-case `echo -n x` in #(((((
+case `echo -n x` in
-n*)
- case `echo 'xy\c'` in
+ case `echo 'x\c'` in
*c*) ECHO_T=' ';; # ECHO_T is single tab character.
- xy) ECHO_C='\c';;
- *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
- ECHO_T=' ';;
+ *) ECHO_C='\c';;
esac;;
*)
ECHO_N='-n';;
esac
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
rm -f conf$$ conf$$.exe conf$$.file
if test -d conf$$.dir; then
@@ -5454,56 +6362,8 @@
rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
rmdir conf$$.dir 2>/dev/null
-
-# as_fn_mkdir_p
-# -------------
-# Create "$as_dir" as a directory, including parents if necessary.
-as_fn_mkdir_p ()
-{
-
- case $as_dir in #(
- -*) as_dir=./$as_dir;;
- esac
- test -d "$as_dir" || eval $as_mkdir_p || {
- as_dirs=
- while :; do
- case $as_dir in #(
- *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
- *) as_qdir=$as_dir;;
- esac
- as_dirs="'$as_qdir' $as_dirs"
- as_dir=`$as_dirname -- "$as_dir" ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
- test -d "$as_dir" && break
- done
- test -z "$as_dirs" || eval "mkdir $as_dirs"
- } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
-
-
-} # as_fn_mkdir_p
if mkdir -p . 2>/dev/null; then
- as_mkdir_p='mkdir -p "$as_dir"'
+ as_mkdir_p=:
else
test -d ./-p && rmdir ./-p
as_mkdir_p=false
@@ -5522,10 +6382,10 @@
if test -d "$1"; then
test -d "$1/.";
else
- case $1 in #(
+ case $1 in
-*)set "./$1";;
esac;
- case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
+ case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in
???[sx]*):;;*)false;;esac;fi
'\'' sh
'
@@ -5540,19 +6400,13 @@
exec 6>&1
-## ----------------------------------- ##
-## Main body of $CONFIG_STATUS script. ##
-## ----------------------------------- ##
-_ASEOF
-test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-# Save the log message, to keep $0 and so on meaningful, and to
+# Save the log message, to keep $[0] and so on meaningful, and to
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by Specfem3D Globe $as_me 5.1.2, which was
-generated by GNU Autoconf 2.67. Invocation command line was
+generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
CONFIG_HEADERS = $CONFIG_HEADERS
@@ -5583,15 +6437,13 @@
cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
ac_cs_usage="\
-\`$as_me' instantiates files and other configuration actions
-from templates according to the current configuration. Unless the files
-and actions are specified as TAGs, all are instantiated by default.
+\`$as_me' instantiates files from templates according to the
+current configuration.
-Usage: $0 [OPTION]... [TAG]...
+Usage: $0 [OPTION]... [FILE]...
-h, --help print this help, then exit
-V, --version print version number and configuration settings, then exit
- --config print configuration, then exit
-q, --quiet, --silent
do not print progress messages
-d, --debug don't remove temporary files
@@ -5607,17 +6459,16 @@
Configuration headers:
$config_headers
-Report bugs to <jtromp AT princeton.edu>."
+Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
Specfem3D Globe config.status 5.1.2
-configured by $0, generated by GNU Autoconf 2.67,
- with options \\"\$ac_cs_config\\"
+configured by $0, generated by GNU Autoconf 2.63,
+ with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 2008 Free Software Foundation, Inc.
This config.status script is free software; the Free Software Foundation
gives unlimited permission to copy, distribute and modify it."
@@ -5632,16 +6483,11 @@
while test $# != 0
do
case $1 in
- --*=?*)
+ --*=*)
ac_option=`expr "X$1" : 'X\([^=]*\)='`
ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
ac_shift=:
;;
- --*=)
- ac_option=`expr "X$1" : 'X\([^=]*\)='`
- ac_optarg=
- ac_shift=:
- ;;
*)
ac_option=$1
ac_optarg=$2
@@ -5655,29 +6501,27 @@
ac_cs_recheck=: ;;
--version | --versio | --versi | --vers | --ver | --ve | --v | -V )
$as_echo "$ac_cs_version"; exit ;;
- --config | --confi | --conf | --con | --co | --c )
- $as_echo "$ac_cs_config"; exit ;;
--debug | --debu | --deb | --de | --d | -d )
debug=: ;;
--file | --fil | --fi | --f )
$ac_shift
case $ac_optarg in
*\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
- '') as_fn_error $? "missing file argument" ;;
esac
- as_fn_append CONFIG_FILES " '$ac_optarg'"
+ CONFIG_FILES="$CONFIG_FILES '$ac_optarg'"
ac_need_defaults=false;;
--header | --heade | --head | --hea )
$ac_shift
case $ac_optarg in
*\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
esac
- as_fn_append CONFIG_HEADERS " '$ac_optarg'"
+ CONFIG_HEADERS="$CONFIG_HEADERS '$ac_optarg'"
ac_need_defaults=false;;
--he | --h)
# Conflict between --help and --header
- as_fn_error $? "ambiguous option: \`$1'
-Try \`$0 --help' for more information.";;
+ { $as_echo "$as_me: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; };;
--help | --hel | -h )
$as_echo "$ac_cs_usage"; exit ;;
-q | -quiet | --quiet | --quie | --qui | --qu | --q \
@@ -5685,10 +6529,11 @@
ac_cs_silent=: ;;
# This is an error.
- -*) as_fn_error $? "unrecognized option: \`$1'
-Try \`$0 --help' for more information." ;;
+ -*) { $as_echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; } ;;
- *) as_fn_append ac_config_targets " $1"
+ *) ac_config_targets="$ac_config_targets $1"
ac_need_defaults=false ;;
esac
@@ -5743,7 +6588,9 @@
"setup/constants.h") CONFIG_FILES="$CONFIG_FILES setup/constants.h" ;;
"setup/precision.h") CONFIG_FILES="$CONFIG_FILES setup/precision.h" ;;
- *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5 ;;
+ *) { { $as_echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+$as_echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+ { (exit 1); exit 1; }; };;
esac
done
@@ -5769,7 +6616,7 @@
trap 'exit_status=$?
{ test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
' 0
- trap 'as_fn_exit 1' 1 2 13 15
+ trap '{ (exit 1); exit 1; }' 1 2 13 15
}
# Create a (secure) tmp directory for tmp files.
@@ -5780,7 +6627,11 @@
{
tmp=./conf$$-$RANDOM
(umask 077 && mkdir "$tmp")
-} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+} ||
+{
+ $as_echo "$as_me: cannot create a temporary directory in ." >&2
+ { (exit 1); exit 1; }
+}
# Set up the scripts for CONFIG_FILES section.
# No need to generate them if there are no CONFIG_FILES.
@@ -5788,13 +6639,8 @@
if test -n "$CONFIG_FILES"; then
-ac_cr=`echo X | tr X '\015'`
-# On cygwin, bash can eat \r inside `` if the user requested igncr.
-# But we know of no other shell where ac_cr would be empty at this
-# point, so we can use a bashism as a fallback.
-if test "x$ac_cr" = x; then
- eval ac_cr=\$\'\\r\'
-fi
+ac_cr='
+'
ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
ac_cs_awk_cr='\\r'
@@ -5811,18 +6657,24 @@
echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
echo "_ACEOF"
} >conf$$subs.sh ||
- as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
-ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
+ { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+ { (exit 1); exit 1; }; }
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'`
ac_delim='%!_!# '
for ac_last_try in false false false false false :; do
. ./conf$$subs.sh ||
- as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+ { (exit 1); exit 1; }; }
ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
if test $ac_delim_n = $ac_delim_num; then
break
elif $ac_last_try; then
- as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+ { (exit 1); exit 1; }; }
else
ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
fi
@@ -5844,7 +6696,7 @@
t delim
:nl
h
-s/\(.\{148\}\)..*/\1/
+s/\(.\{148\}\).*/\1/
t more1
s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
p
@@ -5858,7 +6710,7 @@
t nl
:delim
h
-s/\(.\{148\}\)..*/\1/
+s/\(.\{148\}\).*/\1/
t more2
s/["\\]/\\&/g; s/^/"/; s/$/"/
p
@@ -5911,28 +6763,22 @@
else
cat
fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \
- || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not setup config files machinery" >&5
+$as_echo "$as_me: error: could not setup config files machinery" >&2;}
+ { (exit 1); exit 1; }; }
_ACEOF
-# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
-# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
# trailing colons and then remove the whole line if VPATH becomes empty
# (actually we leave an empty line to preserve line numbers).
if test "x$srcdir" = x.; then
- ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{
-h
-s///
-s/^/:/
-s/[ ]*$/:/
-s/:\$(srcdir):/:/g
-s/:\${srcdir}:/:/g
-s/:@srcdir@:/:/g
-s/^:*//
+ ac_vpsub='/^[ ]*VPATH[ ]*=/{
+s/:*\$(srcdir):*/:/
+s/:*\${srcdir}:*/:/
+s/:*@srcdir@:*/:/
+s/^\([^=]*=[ ]*\):*/\1/
s/:*$//
-x
-s/\(=[ ]*\).*/\1/
-G
-s/\n//
s/^[^=]*=[ ]*$//
}'
fi
@@ -5960,7 +6806,9 @@
if test -z "$ac_t"; then
break
elif $ac_last_try; then
- as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_HEADERS" >&5
+$as_echo "$as_me: error: could not make $CONFIG_HEADERS" >&2;}
+ { (exit 1); exit 1; }; }
else
ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
fi
@@ -6045,7 +6893,9 @@
_ACAWK
_ACEOF
cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
- as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: could not setup config headers machinery" >&5
+$as_echo "$as_me: error: could not setup config headers machinery" >&2;}
+ { (exit 1); exit 1; }; }
fi # test -n "$CONFIG_HEADERS"
@@ -6058,7 +6908,9 @@
esac
case $ac_mode$ac_tag in
:[FHL]*:*);;
- :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5 ;;
+ :L* | :C*:*) { { $as_echo "$as_me:$LINENO: error: invalid tag $ac_tag" >&5
+$as_echo "$as_me: error: invalid tag $ac_tag" >&2;}
+ { (exit 1); exit 1; }; };;
:[FH]-) ac_tag=-:-;;
:[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
esac
@@ -6086,10 +6938,12 @@
[\\/$]*) false;;
*) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
esac ||
- as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5 ;;
+ { { $as_echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5
+$as_echo "$as_me: error: cannot find input file: $ac_f" >&2;}
+ { (exit 1); exit 1; }; };;
esac
case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
- as_fn_append ac_file_inputs " '$ac_f'"
+ ac_file_inputs="$ac_file_inputs '$ac_f'"
done
# Let's still pretend it is `configure' which instantiates (i.e., don't
@@ -6100,7 +6954,7 @@
`' by configure.'
if test x"$ac_file" != x-; then
configure_input="$ac_file. $configure_input"
- { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+ { $as_echo "$as_me:$LINENO: creating $ac_file" >&5
$as_echo "$as_me: creating $ac_file" >&6;}
fi
# Neutralize special characters interpreted by sed in replacement strings.
@@ -6113,7 +6967,9 @@
case $ac_tag in
*:-:* | *:-) cat >"$tmp/stdin" \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
+ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+ { (exit 1); exit 1; }; } ;;
esac
;;
esac
@@ -6141,7 +6997,47 @@
q
}
s/.*/./; q'`
- as_dir="$ac_dir"; as_fn_mkdir_p
+ { as_dir="$ac_dir"
+ case $as_dir in #(
+ -*) as_dir=./$as_dir;;
+ esac
+ test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || {
+ as_dirs=
+ while :; do
+ case $as_dir in #(
+ *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+ *) as_qdir=$as_dir;;
+ esac
+ as_dirs="'$as_qdir' $as_dirs"
+ as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ test -d "$as_dir" && break
+ done
+ test -z "$as_dirs" || eval "mkdir $as_dirs"
+ } || test -d "$as_dir" || { { $as_echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5
+$as_echo "$as_me: error: cannot create directory $as_dir" >&2;}
+ { (exit 1); exit 1; }; }; }
ac_builddir=.
case "$ac_dir" in
@@ -6189,6 +7085,7 @@
# If the template does not know about datarootdir, expand it.
# FIXME: This hack should be removed a few years after 2.60.
ac_datarootdir_hack=; ac_datarootdir_seen=
+
ac_sed_dataroot='
/datarootdir/ {
p
@@ -6198,11 +7095,12 @@
/@docdir@/p
/@infodir@/p
/@localedir@/p
-/@mandir@/p'
+/@mandir@/p
+'
case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
*datarootdir*) ac_datarootdir_seen=yes;;
*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+ { $as_echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
@@ -6212,7 +7110,7 @@
s&@infodir@&$infodir&g
s&@localedir@&$localedir&g
s&@mandir@&$mandir&g
- s&\\\${datarootdir}&$datarootdir&g' ;;
+ s&\\\${datarootdir}&$datarootdir&g' ;;
esac
_ACEOF
@@ -6239,22 +7137,26 @@
$ac_datarootdir_hack
"
eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+ { (exit 1); exit 1; }; }
test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
{ ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
{ ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined. Please make sure it is defined" >&5
+ { $as_echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined. Please make sure it is defined." >&5
$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined. Please make sure it is defined" >&2;}
+which seems to be undefined. Please make sure it is defined." >&2;}
rm -f "$tmp/stdin"
case $ac_file in
-) cat "$tmp/out" && rm -f "$tmp/out";;
*) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";;
esac \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+ { (exit 1); exit 1; }; }
;;
:H)
#
@@ -6265,19 +7167,25 @@
$as_echo "/* $configure_input */" \
&& eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs"
} >"$tmp/config.h" \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+ { (exit 1); exit 1; }; }
if diff "$ac_file" "$tmp/config.h" >/dev/null 2>&1; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
+ { $as_echo "$as_me:$LINENO: $ac_file is unchanged" >&5
$as_echo "$as_me: $ac_file is unchanged" >&6;}
else
rm -f "$ac_file"
mv "$tmp/config.h" "$ac_file" \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+ { (exit 1); exit 1; }; }
fi
else
$as_echo "/* $configure_input */" \
&& eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs" \
- || as_fn_error $? "could not create -" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not create -" >&5
+$as_echo "$as_me: error: could not create -" >&2;}
+ { (exit 1); exit 1; }; }
fi
;;
@@ -6287,12 +7195,15 @@
done # for ac_tag
-as_fn_exit 0
+{ (exit 0); exit 0; }
_ACEOF
+chmod +x $CONFIG_STATUS
ac_clean_files=$ac_clean_files_save
test $ac_write_fail = 0 ||
- as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: write failure creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: write failure creating $CONFIG_STATUS" >&2;}
+ { (exit 1); exit 1; }; }
# configure is writing to config.log, and then calls config.status.
@@ -6313,10 +7224,10 @@
exec 5>>config.log
# Use ||, not &&, to avoid exiting from the if with $? = 1, which
# would make configure fail if this is the last instruction.
- $ac_cs_success || as_fn_exit 1
+ $ac_cs_success || { (exit 1); exit 1; }
fi
if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+ { $as_echo "$as_me:$LINENO: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
fi
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure.ac
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure.ac 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/configure.ac 2012-02-14 15:11:07 UTC (rev 19622)
@@ -33,7 +33,14 @@
AC_SUBST([CUSTOM_REAL])
AC_SUBST([CUSTOM_MPI_TYPE])
+AC_ARG_WITH([cuda],
+ [AC_HELP_STRING([--with-cuda],
+ [build cuda GPU enabled version @<:@default=no@:>@])],
+ [want_cuda="$withval"],
+ [want_cuda=no])
+AM_CONDITIONAL([COND_CUDA], [test "$want_cuda" = yes])
+
# Checks for programs.
# a courtesy to the installed base of users
@@ -76,6 +83,13 @@
AC_ARG_VAR(MPILIBS, [extra libraries for linking MPI programs])
AC_ARG_VAR(FLAGS_CHECK, [Fortran compiler flags for non-critical subroutines])
AC_ARG_VAR(FLAGS_NO_CHECK, [Fortran compiler flags for creating fast, production-run code for critical subroutines])
+AC_ARG_VAR(CUDA_LIB,[Location of CUDA library libcudart])
+AC_ARG_VAR(CUDA_INC,[Location of CUDA include files])
+AC_ARG_VAR(MPI_INC,[Location of MPI include mpi.h, which is needed by nvcc when compiling cuda files])
+AC_ARG_VAR(AR, [ar library creation])
+AC_ARG_VAR(ARFLAGS, [ar flag library creation])
+AC_ARG_VAR(RANLIB, [ranlib library creation])
+
if test x"$MPIFC" = x; then
MPIFC=mpif90
fi
@@ -101,7 +115,7 @@
# Checks for library functions.
-AC_CONFIG_FILES([Makefile src/auxiliaries/Makefile src/meshfem3D/Makefile src/specfem3D/Makefile src/create_header_file/Makefile setup/constants.h setup/precision.h DATA/Par_file DATA/CMTSOLUTION DATA/STATIONS])
+AC_CONFIG_FILES([Makefile src/auxiliaries/Makefile src/meshfem3D/Makefile src/specfem3D/Makefile src/create_header_file/Makefile setup/constants.h setup/precision.h])
AC_OUTPUT
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/flags.guess
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/flags.guess 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/flags.guess 2012-02-14 15:11:07 UTC (rev 19622)
@@ -27,28 +27,28 @@
# check: http://software.intel.com/sites/products/documentation/hpc/compilerpro/en-us/fortran/lin/compiler_f/index.htm
#
if test x"$FLAGS_CHECK" = x; then
- FLAGS_CHECK="-O3 -xSSE4.2 -ftz -funroll-loops -unroll5 -ftz -align sequence -vec-report0 -std95 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage -check nobounds" # -mcmodel=medium
+ FLAGS_CHECK="-O3 -xSSE4.2 -ftz -funroll-loops -unroll5 -ftz -align sequence -assume byterecl -vec-report0 -std95 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage -check nobounds" # -mcmodel=medium
fi
if test x"$FLAGS_NO_CHECK" = x; then
- FLAGS_NO_CHECK="-O3 -xSSE4.2 -ftz -funroll-loops -unroll5 -ftz -align sequence -vec-report0 -std95 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage -check nobounds" # -mcmodel=medium
+ FLAGS_NO_CHECK="-O3 -xSSE4.2 -ftz -funroll-loops -unroll5 -ftz -align sequence -assume byterecl -vec-report0 -std95 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage -check nobounds" # -mcmodel=medium
fi
# useful for debugging...
# for debugging: change -check nobounds to -check all -debug -g -O0 -fp-stack-check -traceback -ftrapuv
#if test x"$FLAGS_CHECK" = x; then
# # without -e95
- # FLAGS_CHECK="-O3 -check nobounds -ftz -traceback -ftrapuv -align sequence -vec-report0 -std95 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage" # -mcmodel=medium
+ # FLAGS_CHECK="-O3 -check nobounds -ftz -traceback -ftrapuv -align sequence -assume byterecl -vec-report0 -std95 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage" # -mcmodel=medium
#fi
#if test x"$FLAGS_NO_CHECK" = x; then
# # standard options (leave option -ftz, which is *critical* for performance)
- # FLAGS_NO_CHECK="-O3 -xP -ftz -align sequence -vec-report0 -std95 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage" # -mcmodel=medium
+ # FLAGS_NO_CHECK="-O3 -xP -ftz -align sequence -assume byterecl -vec-report0 -std95 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage" # -mcmodel=medium
#fi
#
# Intel Nehalem processor architecture, Intel compiler version 10.1
#if test x"$FLAGS_CHECK" = x; then
- # FLAGS_CHECK="-O2 -ftz -xT -static-intel -implicitnone"
+ # FLAGS_CHECK="-O2 -ftz -xT -assume byterecl -static-intel -implicitnone"
#fi
#if test x"$FLAGS_NO_CHECK" = x; then
- # FLAGS_NO_CHECK="-O2 -ftz -xT -static-intel -implicitnone"
+ # FLAGS_NO_CHECK="-O2 -ftz -xT -assume byterecl -static-intel -implicitnone"
#fi
#
# Intel Nehalem processor architecture, Intel compiler version 11.1
@@ -154,9 +154,9 @@
;;
*xlf*|*/*xlf*)
#
- # on some (but not all) IBM machines one might need to add -qsave otherwise the IBM compiler allocates the
+ # do NOT remove option -qsave otherwise the IBM compiler allocates the
# arrays in the stack and the code crashes if the stack size is too
- # small (which is sometimes the case, but less often these days on large machines)
+ # small (which is often the case)
#
# on IBM with xlf one should also set
#
@@ -177,11 +177,11 @@
#
if test x"$FLAGS_NO_CHECK" = x; then
# deleted -qxflag=dvz because it requires handler function __xl_dzx and thus linking will fail
- FLAGS_NO_CHECK="-O3 -qstrict -q64 -qtune=auto -qarch=auto -qcache=auto -qfree=f90 -qsuffix=f=f90 -qhalt=w -qlanglvl=90pure -qsuppress=1518-317 -qflttrap=overflow:zerodivide:invalid:enable -qfloat=nans -qinitauto=7FBFFFFF -Q -Q+rank,swap_all -Wl,-relax -qunroll=yes"
+ FLAGS_NO_CHECK="-O3 -qsave -qstrict -q64 -qtune=auto -qarch=auto -qcache=auto -qfree=f90 -qsuffix=f=f90 -qhalt=w -qlanglvl=90pure -qsuppress=1518-317 -qflttrap=overflow:zerodivide:invalid:enable -qfloat=nans -qinitauto=7FBFFFFF -Q -Q+rank,swap_all -Wl,-relax -qunroll=yes"
# on "MareNostrum" IBM at the Barcelona SuperComputing Center (Spain) use:
- # -qtune=ppc970 -qarch=ppc64v -qsave instead of -qtune=auto -qarch=auto
+ # -qtune=ppc970 -qarch=ppc64v instead of -qtune=auto -qarch=auto
# on "Babel" IBM BlueGene at IDRIS (France) use:
- # -qtune=auto -qarch=450d -qsave instead of -qtune=auto -qarch=auto
+ # -qtune=auto -qarch=450d instead of -qtune=auto -qarch=auto
fi
if test x"$FLAGS_CHECK" = x; then
#
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in 2012-02-14 15:11:07 UTC (rev 19622)
@@ -47,7 +47,6 @@
integer, parameter :: CUSTOM_REAL = @CUSTOM_REAL@
! this for non blocking assembly
- logical, parameter :: USE_NONBLOCKING_COMMS = .true.
integer, parameter :: ELEMENTS_NONBLOCKING_CM_IC = 1500
integer, parameter :: ELEMENTS_NONBLOCKING_OC = 3000
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/auxiliaries/create_movie_AVS_DX.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/auxiliaries/create_movie_AVS_DX.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/auxiliaries/create_movie_AVS_DX.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -758,48 +758,49 @@
!
!=====================================================================
!
-
- subroutine read_params_and_create_movie
-
!
-! This routine is called by the Pyrized version.
+! subroutine read_params_and_create_movie
!
+!!
+!! This routine is called by the Pyrized version.
+!!
+!
+! implicit none
+!
+! integer it1,it2
+! integer iformat
+!
+!! parameters read from parameter file
+! integer NEX_XI,NEX_ETA
+! integer NSTEP,NTSTEP_BETWEEN_FRAMES,NCHUNKS
+! integer NPROCTOT,NEX_PER_PROC_XI,NEX_PER_PROC_ETA
+! logical MOVIE_SURFACE
+!
+! integer, external :: err_occurred
+!
+! call read_AVS_DX_parameters(NEX_XI,NEX_ETA, &
+! NSTEP,NTSTEP_BETWEEN_FRAMES, &
+! NCHUNKS,MOVIE_SURFACE, &
+! NPROCTOT,NEX_PER_PROC_XI,NEX_PER_PROC_ETA)
+!
+!! read additional parameters for making movies
+! call read_value_integer(iformat, 'format')
+! if(err_occurred() /= 0) stop 'an error occurred while reading the parameter file'
+! call read_value_integer(it1, 'beginning')
+! if(err_occurred() /= 0) stop 'an error occurred while reading the parameter file'
+! call read_value_integer(it2, 'end')
+! if(err_occurred() /= 0) stop 'an error occurred while reading the parameter file'
+!
+!! run the main program
+! call create_movie_AVS_DX(iformat,it1,it2, &
+! NEX_XI,NEX_ETA, &
+! NSTEP,NTSTEP_BETWEEN_FRAMES, &
+! NCHUNKS, &
+! NPROCTOT,NEX_PER_PROC_XI,NEX_PER_PROC_ETA, &
+! USE_COMPONENT)
+!
+! end subroutine read_params_and_create_movie
- implicit none
-
- integer it1,it2
- integer iformat
-
-! parameters read from parameter file
- integer NEX_XI,NEX_ETA
- integer NSTEP,NTSTEP_BETWEEN_FRAMES,NCHUNKS
- integer NPROCTOT,NEX_PER_PROC_XI,NEX_PER_PROC_ETA
- logical MOVIE_SURFACE
-
- integer, external :: err_occurred
-
- call read_AVS_DX_parameters(NEX_XI,NEX_ETA, &
- NSTEP,NTSTEP_BETWEEN_FRAMES, &
- NCHUNKS,MOVIE_SURFACE, &
- NPROCTOT,NEX_PER_PROC_XI,NEX_PER_PROC_ETA)
-
-! read additional parameters for making movies
- call read_value_integer(iformat, 'format')
- if(err_occurred() /= 0) stop 'an error occurred while reading the parameter file'
- call read_value_integer(it1, 'beginning')
- if(err_occurred() /= 0) stop 'an error occurred while reading the parameter file'
- call read_value_integer(it2, 'end')
- if(err_occurred() /= 0) stop 'an error occurred while reading the parameter file'
-
-! run the main program
- call create_movie_AVS_DX(iformat,it1,it2, &
- NEX_XI,NEX_ETA, &
- NSTEP,NTSTEP_BETWEEN_FRAMES, &
- NCHUNKS, &
- NPROCTOT,NEX_PER_PROC_XI,NEX_PER_PROC_ETA)
-
- end subroutine read_params_and_create_movie
-
! ------------------------------------------------------------------
subroutine read_AVS_DX_parameters(NEX_XI,NEX_ETA, &
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_scalar_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,200 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// prepares a device array with with all inter-element edge-nodes -- this
+// is followed by a memcpy and MPI operations
+__global__ void prepare_boundary_potential_on_device(realw* d_potential_dot_dot_acoustic,
+ realw* d_send_potential_dot_dot_buffer,
+ int num_interfaces_ext_mesh,
+ int max_nibool_interfaces_ext_mesh,
+ int* d_nibool_interfaces_ext_mesh,
+ int* d_ibool_interfaces_ext_mesh) {
+
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+ int iinterface=0;
+
+ for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
+ if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
+ d_send_potential_dot_dot_buffer[(id + max_nibool_interfaces_ext_mesh*iinterface)] =
+ d_potential_dot_dot_acoustic[(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)];
+ }
+ }
+
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// prepares and transfers the inter-element edge-nodes to the host to be MPI'd
+extern "C"
+void FC_FUNC_(transfer_boun_pot_from_device,
+ TRANSFER_BOUN_POT_FROM_DEVICE)(long* Mesh_pointer_f,
+ realw* send_potential_dot_dot_buffer,
+ int* FORWARD_OR_ADJOINT){
+
+ TRACE("transfer_boun_pot_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // checks if anything to do
+ if( mp->num_interfaces_outer_core == 0 ) return;
+
+ int blocksize = BLOCKSIZE_TRANSFER;
+ int size_padded = ((int)ceil(((double)(mp->max_nibool_interfaces_outer_core))/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ if(*FORWARD_OR_ADJOINT == 1) {
+ prepare_boundary_potential_on_device<<<grid,threads>>>(mp->d_accel_outer_core,
+ mp->d_send_accel_buffer_outer_core,
+ mp->num_interfaces_outer_core,
+ mp->max_nibool_interfaces_outer_core,
+ mp->d_nibool_interfaces_outer_core,
+ mp->d_ibool_interfaces_outer_core);
+ }
+ else if(*FORWARD_OR_ADJOINT == 3) {
+ prepare_boundary_potential_on_device<<<grid,threads>>>(mp->d_b_accel_outer_core,
+ mp->d_send_accel_buffer_outer_core,
+ mp->num_interfaces_outer_core,
+ mp->max_nibool_interfaces_outer_core,
+ mp->d_nibool_interfaces_outer_core,
+ mp->d_ibool_interfaces_outer_core);
+ }
+
+ print_CUDA_error_if_any(cudaMemcpy(send_potential_dot_dot_buffer,mp->d_send_accel_buffer_outer_core,
+ (mp->max_nibool_interfaces_outer_core)*(mp->num_interfaces_outer_core)*sizeof(realw),
+ cudaMemcpyDeviceToHost),98000);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_boundary_kernel");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void assemble_boundary_potential_on_device(realw* d_potential_dot_dot_acoustic,
+ realw* d_send_potential_dot_dot_buffer,
+ int num_interfaces_ext_mesh,
+ int max_nibool_interfaces_ext_mesh,
+ int* d_nibool_interfaces_ext_mesh,
+ int* d_ibool_interfaces_ext_mesh) {
+
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+ int iinterface=0;
+
+ for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
+ if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
+
+ atomicAdd(&d_potential_dot_dot_acoustic[(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)],
+ d_send_potential_dot_dot_buffer[(id + max_nibool_interfaces_ext_mesh*iinterface)]);
+ }
+ }
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_asmbl_pot_to_device,
+ TRANSFER_ASMBL_POT_TO_DEVICE)(long* Mesh_pointer,
+ realw* buffer_recv_scalar,
+ int* FORWARD_OR_ADJOINT) {
+
+ TRACE("transfer_asmbl_pot_to_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+ //double start_time = get_time();
+
+ // checks if anything to do
+ if( mp->num_interfaces_outer_core == 0 ) return;
+
+ // copies scalar buffer onto GPU
+ cudaMemcpy(mp->d_send_accel_buffer_outer_core, buffer_recv_scalar,
+ (mp->max_nibool_interfaces_outer_core)*(mp->num_interfaces_outer_core)*sizeof(realw),
+ cudaMemcpyHostToDevice);
+
+ // assembles on GPU
+ int blocksize = BLOCKSIZE_TRANSFER;
+ int size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_outer_core)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ if(*FORWARD_OR_ADJOINT == 1) {
+ //assemble forward field
+ assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_accel_outer_core,
+ mp->d_send_accel_buffer_outer_core,
+ mp->num_interfaces_outer_core,
+ mp->max_nibool_interfaces_outer_core,
+ mp->d_nibool_interfaces_outer_core,
+ mp->d_ibool_interfaces_outer_core);
+ }
+ else if(*FORWARD_OR_ADJOINT == 3) {
+ //assemble reconstructed/backward field
+ assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_b_accel_outer_core,
+ mp->d_send_accel_buffer_outer_core,
+ mp->num_interfaces_outer_core,
+ mp->max_nibool_interfaces_outer_core,
+ mp->d_nibool_interfaces_outer_core,
+ mp->d_ibool_interfaces_outer_core);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("transfer_asmbl_pot_to_device");
+#endif
+}
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/assemble_MPI_vector_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,287 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// prepares a device array with all inter-element edge-nodes -- this
+// is followed by a memcpy and MPI operations
+__global__ void prepare_boundary_accel_on_device(realw* d_accel, realw* d_send_accel_buffer,
+ int num_interfaces_ext_mesh,
+ int max_nibool_interfaces_ext_mesh,
+ int* d_nibool_interfaces_ext_mesh,
+ int* d_ibool_interfaces_ext_mesh) {
+
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+ int iinterface=0;
+
+ for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
+ if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)] =
+ d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)];
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1] =
+ d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1];
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2] =
+ d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2];
+ }
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// prepares and transfers the inter-element edge-nodes to the host to be MPI'd
+// (elements on boundary)
+extern "C"
+void FC_FUNC_(transfer_boun_accel_from_device,
+ TRANSFER_BOUN_ACCEL_FROM_DEVICE)(long* Mesh_pointer_f,
+ realw* send_accel_buffer,
+ int* IREGION,
+ int* FORWARD_OR_ADJOINT){
+ TRACE("transfer_boun_accel_from_device");
+ int blocksize,size_padded,num_blocks_x,num_blocks_y;
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // crust/mantle region
+ if( *IREGION == IREGION_CRUST_MANTLE ){
+ if( mp->num_interfaces_crust_mantle > 0 ){
+
+ blocksize = BLOCKSIZE_TRANSFER;
+ size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_crust_mantle)/((double)blocksize)))*blocksize;
+ num_blocks_x = size_padded/blocksize;
+ num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ if(*FORWARD_OR_ADJOINT == 1) {
+ prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel_crust_mantle,
+ mp->d_send_accel_buffer_crust_mantle,
+ mp->num_interfaces_crust_mantle,
+ mp->max_nibool_interfaces_crust_mantle,
+ mp->d_nibool_interfaces_crust_mantle,
+ mp->d_ibool_interfaces_crust_mantle);
+ }
+ else if(*FORWARD_OR_ADJOINT == 3) {
+ prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
+ mp->d_send_accel_buffer_crust_mantle,
+ mp->num_interfaces_crust_mantle,
+ mp->max_nibool_interfaces_crust_mantle,
+ mp->d_nibool_interfaces_crust_mantle,
+ mp->d_ibool_interfaces_crust_mantle);
+ }
+
+ // copies buffer to CPU
+ cudaMemcpy(send_accel_buffer,mp->d_send_accel_buffer_crust_mantle,
+ 3*mp->max_nibool_interfaces_crust_mantle*mp->num_interfaces_crust_mantle*sizeof(realw),
+ cudaMemcpyDeviceToHost);
+
+ }
+ }
+
+ // inner core region
+ if( *IREGION == IREGION_INNER_CORE ){
+ if( mp->num_interfaces_inner_core > 0 ){
+
+ blocksize = BLOCKSIZE_TRANSFER;
+ size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_inner_core)/((double)blocksize)))*blocksize;
+ num_blocks_x = size_padded/blocksize;
+ num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ if(*FORWARD_OR_ADJOINT == 1) {
+ prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel_inner_core,
+ mp->d_send_accel_buffer_inner_core,
+ mp->num_interfaces_inner_core,
+ mp->max_nibool_interfaces_inner_core,
+ mp->d_nibool_interfaces_inner_core,
+ mp->d_ibool_interfaces_inner_core);
+ }
+ else if(*FORWARD_OR_ADJOINT == 3) {
+ prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel_inner_core,
+ mp->d_send_accel_buffer_inner_core,
+ mp->num_interfaces_inner_core,
+ mp->max_nibool_interfaces_inner_core,
+ mp->d_nibool_interfaces_inner_core,
+ mp->d_ibool_interfaces_inner_core);
+ }
+
+ // copies buffer to CPU
+ cudaMemcpy(send_accel_buffer,mp->d_send_accel_buffer_inner_core,
+ 3*mp->max_nibool_interfaces_inner_core*mp->num_interfaces_inner_core*sizeof(realw),
+ cudaMemcpyDeviceToHost);
+
+ }
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_boun_accel_from_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void assemble_boundary_accel_on_device(realw* d_accel, realw* d_send_accel_buffer,
+ int num_interfaces_ext_mesh,
+ int max_nibool_interfaces_ext_mesh,
+ int* d_nibool_interfaces_ext_mesh,
+ int* d_ibool_interfaces_ext_mesh) {
+
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+ int iinterface=0;
+
+ for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
+ if(id < d_nibool_interfaces_ext_mesh[iinterface]) {
+ atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)],
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)]);
+ atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1],
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1]);
+ atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2],
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2]);
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// FORWARD_OR_ADJOINT == 1 for accel, and == 3 for b_accel
+extern "C"
+void FC_FUNC_(transfer_asmbl_accel_to_device,
+ TRANSFER_ASMBL_ACCEL_TO_DEVICE)(long* Mesh_pointer,
+ realw* buffer_recv_vector,
+ int* IREGION,
+ int* FORWARD_OR_ADJOINT) {
+ TRACE("transfer_asmbl_accel_to_device");
+ int blocksize,size_padded,num_blocks_x,num_blocks_y;
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ // crust/mantle region
+ if( *IREGION == IREGION_CRUST_MANTLE ){
+ if( mp->num_interfaces_crust_mantle > 0 ){
+
+ // copies vector buffer values to GPU
+ cudaMemcpy(mp->d_send_accel_buffer_crust_mantle, buffer_recv_vector,
+ 3*(mp->max_nibool_interfaces_crust_mantle)*(mp->num_interfaces_crust_mantle)*sizeof(realw),
+ cudaMemcpyHostToDevice);
+
+ // assembles values
+ blocksize = BLOCKSIZE_TRANSFER;
+ size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_crust_mantle)/((double)blocksize)))*blocksize;
+ num_blocks_x = size_padded/blocksize;
+ num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ if(*FORWARD_OR_ADJOINT == 1) { //assemble forward accel
+ assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel_crust_mantle,
+ mp->d_send_accel_buffer_crust_mantle,
+ mp->num_interfaces_crust_mantle,
+ mp->max_nibool_interfaces_crust_mantle,
+ mp->d_nibool_interfaces_crust_mantle,
+ mp->d_ibool_interfaces_crust_mantle);
+ }
+ else if(*FORWARD_OR_ADJOINT == 3) { //assemble adjoint accel
+ assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
+ mp->d_send_accel_buffer_crust_mantle,
+ mp->num_interfaces_crust_mantle,
+ mp->max_nibool_interfaces_crust_mantle,
+ mp->d_nibool_interfaces_crust_mantle,
+ mp->d_ibool_interfaces_crust_mantle);
+ }
+ }
+ }
+
+ // inner core region
+ if( *IREGION == IREGION_INNER_CORE ){
+ if( mp->num_interfaces_inner_core > 0 ){
+ // copies buffer values to GPU
+ cudaMemcpy(mp->d_send_accel_buffer_inner_core, buffer_recv_vector,
+ 3*(mp->max_nibool_interfaces_inner_core)*(mp->num_interfaces_inner_core)*sizeof(realw),
+ cudaMemcpyHostToDevice);
+
+ // assembles values
+ blocksize = BLOCKSIZE_TRANSFER;
+ size_padded = ((int)ceil(((double)mp->max_nibool_interfaces_inner_core)/((double)blocksize)))*blocksize;
+ num_blocks_x = size_padded/blocksize;
+ num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ if(*FORWARD_OR_ADJOINT == 1) { //assemble forward accel
+ assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel_inner_core,
+ mp->d_send_accel_buffer_inner_core,
+ mp->num_interfaces_inner_core,
+ mp->max_nibool_interfaces_inner_core,
+ mp->d_nibool_interfaces_inner_core,
+ mp->d_ibool_interfaces_inner_core);
+ }
+ else if(*FORWARD_OR_ADJOINT == 3) { //assemble adjoint accel
+ assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel_inner_core,
+ mp->d_send_accel_buffer_inner_core,
+ mp->num_interfaces_inner_core,
+ mp->max_nibool_interfaces_inner_core,
+ mp->d_nibool_interfaces_inner_core,
+ mp->d_ibool_interfaces_inner_core);
+ }
+ }
+ }
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_asmbl_accel_to_device");
+#endif
+}
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/check_fields_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,928 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+#include "prepare_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Helper functions
+
+/* ----------------------------------------------------------------------------------------------- */
+
+double get_time()
+{
+ struct timeval t;
+ struct timezone tzp;
+ gettimeofday(&t, &tzp);
+ return t.tv_sec + t.tv_usec*1e-6;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(pause_for_debug,
+ PAUSE_FOR_DEBUG)() {
+ TRACE("pause_for_debug");
+
+ pause_for_debugger(1);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void pause_for_debugger(int pause) {
+ if(pause) {
+ int myrank;
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+#else
+ myrank = 0;
+#endif
+ printf("I'm rank %d\n",myrank);
+ int i = 0;
+ char hostname[256];
+ gethostname(hostname, sizeof(hostname));
+ printf("PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
+ FILE *file = fopen("/scratch/eiger/rietmann/attach_gdb.txt","w+");
+ if( file != NULL ){
+ fprintf(file,"PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
+ fclose(file);
+ }
+ fflush(stdout);
+ while (0 == i)
+ sleep(5);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void exit_on_cuda_error(char* kernel_name) {
+ // sync and check to catch errors from previous async operations
+ cudaThreadSynchronize();
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr,"Error after: %s --- %s\n", kernel_name, cudaGetErrorString(err));
+ pause_for_debugger(0);
+ //free(kernel_name);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void exit_on_error(char* info)
+{
+ printf("\nERROR: %s\n",info);
+ fflush(stdout);
+#ifdef WITH_MPI
+ MPI_Abort(MPI_COMM_WORLD,1);
+#endif
+ //free(info);
+ exit(EXIT_FAILURE);
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void print_CUDA_error_if_any(cudaError_t err, int num)
+{
+ if (cudaSuccess != err)
+ {
+ printf("\nCUDA error !!!!! <%s> !!!!! \nat CUDA call error code: # %d\n",cudaGetErrorString(err),num);
+ fflush(stdout);
+#ifdef WITH_MPI
+ MPI_Abort(MPI_COMM_WORLD,1);
+#endif
+ exit(EXIT_FAILURE);
+ }
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void get_free_memory(double* free_db, double* used_db, double* total_db) {
+
+ // gets memory usage in byte
+ size_t free_byte ;
+ size_t total_byte ;
+ cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
+ if ( cudaSuccess != cuda_status ){
+ printf("Error: cudaMemGetInfo fails, %s \n", cudaGetErrorString(cuda_status) );
+ exit(EXIT_FAILURE);
+ }
+
+ *free_db = (double)free_byte ;
+ *total_db = (double)total_byte ;
+ *used_db = *total_db - *free_db ;
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Saves GPU memory usage to file
+void output_free_memory(int myrank,char* info_str) {
+
+ FILE* fp;
+ char filename[BUFSIZ];
+ double free_db,used_db,total_db;
+
+ get_free_memory(&free_db,&used_db,&total_db);
+
+ sprintf(filename,"OUTPUT_FILES/gpu_device_mem_usage_proc_%06d.txt",myrank);
+ fp = fopen(filename,"a+");
+ if( fp != NULL ){
+ fprintf(fp,"%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
+ used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+ fclose(fp);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Fortran-callable version of above method
+extern "C"
+void FC_FUNC_(output_free_device_memory,
+ OUTPUT_FREE_DEVICE_MEMORY)(int* myrank) {
+
+ TRACE("output_free_device_memory");
+
+ char info[6];
+ sprintf(info,"f %d:",*myrank);
+ output_free_memory(*myrank,info);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(get_free_device_memory,
+ get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ) {
+ TRACE("get_free_device_memory");
+
+ double free_db,used_db,total_db;
+
+ get_free_memory(&free_db,&used_db,&total_db);
+
+ // converts to MB
+ *free = (realw) free_db/1024.0/1024.0;
+ *used = (realw) used_db/1024.0/1024.0;
+ *total = (realw) total_db/1024.0/1024.0;
+ return;
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+//daniel: helper function
+/*
+ __global__ void check_phase_ispec_kernel(int num_phase_ispec,
+ int* phase_ispec,
+ int NSPEC_AB,
+ int* ier) {
+
+ int i,ispec,iphase,count0,count1;
+ *ier = 0;
+
+ for(iphase=0; iphase < 2; iphase++){
+ count0 = 0;
+ count1 = 0;
+
+ for(i=0; i < num_phase_ispec; i++){
+ ispec = phase_ispec[iphase*num_phase_ispec + i] - 1;
+ if( ispec < -1 || ispec >= NSPEC_AB ){
+ printf("Error in d_phase_ispec_inner_elastic %d %d\n",i,ispec);
+ *ier = 1;
+ return;
+ }
+ if( ispec >= 0 ){ count0++;}
+ if( ispec < 0 ){ count1++;}
+ }
+
+ printf("check_phase_ispec done: phase %d, count = %d %d \n",iphase,count0,count1);
+
+ }
+ }
+
+ void check_phase_ispec(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check phase_ispec for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 1 ){
+ check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_elastic,
+ mp->d_phase_ispec_inner_elastic,
+ mp->NSPEC_AB,
+ d_debug);
+ }else if( type == 2 ){
+ check_phase_ispec_kernel<<<grid,threads>>>(mp->num_phase_ispec_acoustic,
+ mp->d_phase_ispec_inner_acoustic,
+ mp->NSPEC_AB,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+ #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_phase_ispec");
+ #endif
+
+ }
+ */
+
+/* ----------------------------------------------------------------------------------------------- */
+//daniel: helper function
+/*
+ __global__ void check_ispec_is_kernel(int NSPEC_AB,
+ int* ispec_is,
+ int* ier) {
+
+ int ispec,count0,count1;
+
+ *ier = 0;
+ count0 = 0;
+ count1 = 0;
+ for(ispec=0; ispec < NSPEC_AB; ispec++){
+ if( ispec_is[ispec] < -1 || ispec_is[ispec] > 1 ){
+ printf("Error in ispec_is %d %d\n",ispec,ispec_is[ispec]);
+ *ier = 1;
+ return;
+ //exit(1);
+ }
+ if( ispec_is[ispec] == 0 ){count0++;}
+ if( ispec_is[ispec] != 0 ){count1++;}
+ }
+ printf("check_ispec_is done: count = %d %d\n",count0,count1);
+ }
+
+ void check_ispec_is(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check ispec_is for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 0 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_inner,
+ d_debug);
+ }else if( type == 1 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_elastic,
+ d_debug);
+ }else if( type == 2 ){
+ check_ispec_is_kernel<<<grid,threads>>>(mp->NSPEC_AB,
+ mp->d_ispec_is_acoustic,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+ #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_ispec_is");
+ #endif
+ }
+ */
+/* ----------------------------------------------------------------------------------------------- */
+//daniel: helper function
+/*
+ __global__ void check_array_ispec_kernel(int num_array_ispec,
+ int* array_ispec,
+ int NSPEC_AB,
+ int* ier) {
+
+ int i,ispec,count0,count1;
+
+ *ier = 0;
+ count0 = 0;
+ count1 = 0;
+
+ for(i=0; i < num_array_ispec; i++){
+ ispec = array_ispec[i] - 1;
+ if( ispec < -1 || ispec >= NSPEC_AB ){
+ printf("Error in d_array_ispec %d %d\n",i,ispec);
+ *ier = 1;
+ return;
+ }
+ if( ispec >= 0 ){ count0++;}
+ if( ispec < 0 ){ count1++;}
+ }
+
+ printf("check_array_ispec done: count = %d %d \n",count0,count1);
+ }
+
+ void check_array_ispec(long* Mesh_pointer_f,int type){
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ printf("check array_ispec for type=%d\n",type);
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ int* h_debug = (int*) calloc(1,sizeof(int));
+ int* d_debug;
+ cudaMalloc((void**)&d_debug,sizeof(int));
+
+ if( type == 1 ){
+ check_array_ispec_kernel<<<grid,threads>>>(mp->d_num_abs_boundary_faces,
+ mp->d_abs_boundary_ispec,
+ mp->NSPEC_AB,
+ d_debug);
+ }
+
+ cudaMemcpy(h_debug,d_debug,1*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ if( *h_debug != 0 ){printf("error for type=%d\n",type); exit(1);}
+ free(h_debug);
+ fflush(stdout);
+
+ #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("check_array_ispec");
+ #endif
+
+ }
+ */
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Check functions
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_displ_gpu,
+ CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {
+
+TRACE("check_max_norm_displ_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(displ, mp->d_displ,*size*sizeof(realw),cudaMemcpyDeviceToHost);
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(displ[i]));
+ }
+ printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_vector,
+ CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID) {
+
+TRACE("check_max_norm_vector");
+
+ int procid;
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+#else
+ procid = 0;
+#endif
+ realw maxnorm=0;
+ int maxloc;
+ for(int i=0;i<*size;i++) {
+ if(maxnorm<fabsf(vector1[i])) {
+ maxnorm = vector1[i];
+ maxloc = i;
+ }
+ }
+ printf("%d:maxnorm of vector %d [%d] = %e\n",procid,*announceID,maxloc,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_displ,
+ CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID) {
+
+TRACE("check_max_norm_displ");
+
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(displ[i]));
+ }
+ printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_b_displ_gpu,
+ CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID) {
+
+TRACE("check_max_norm_b_displ_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ realw* b_accel = (realw*)malloc(*size*sizeof(realw));
+
+ cudaMemcpy(b_displ, mp->d_b_displ,*size*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(realw),cudaMemcpyDeviceToHost);
+
+ realw maxnorm=0;
+ realw maxnorm_accel=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
+ maxnorm_accel = MAX(maxnorm,fabsf(b_accel[i]));
+ }
+ free(b_accel);
+ printf("%d: maxnorm of backward displ = %e\n",*announceID,maxnorm);
+ printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm_accel);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_b_accel_gpu,
+ CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID) {
+
+TRACE("check_max_norm_b_accel_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(realw),cudaMemcpyDeviceToHost);
+
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
+ }
+ printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_b_veloc_gpu,
+ CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID) {
+
+TRACE("check_max_norm_b_veloc_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(b_veloc, mp->d_b_veloc,*size*sizeof(realw),cudaMemcpyDeviceToHost);
+
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_veloc[i]));
+ }
+ printf("%d: maxnorm of backward veloc = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_b_displ,
+ CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID) {
+
+TRACE("check_max_norm_b_displ");
+
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
+ }
+ printf("%d:maxnorm of backward displ = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_b_accel,
+ CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID) {
+
+TRACE("check_max_norm_b_accel");
+
+ realw maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
+ }
+ printf("%d:maxnorm of backward accel = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_error_vectors,
+ CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2) {
+
+TRACE("check_error_vectors");
+
+ int size = *sizef;
+
+ double diff2 = 0;
+ double sum = 0;
+ double temp;
+ double maxerr=0;
+ int maxerrorloc;
+
+ for(int i=0;i<size;++i) {
+ temp = vector1[i]-vector2[i];
+ diff2 += temp*temp;
+ sum += vector1[i]*vector1[i];
+ if(maxerr < fabsf(temp)) {
+ maxerr = abs(temp);
+ maxerrorloc = i;
+ }
+ }
+
+ printf("rel error = %f, maxerr = %e @ %d\n",diff2/sum,maxerr,maxerrorloc);
+ int myrank;
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+#else
+ myrank = 0;
+#endif
+ if(myrank == 0) {
+ for(int i=maxerrorloc;i>maxerrorloc-5;i--) {
+ printf("[%d]: %e vs. %e\n",i,vector1[i],vector2[i]);
+ }
+ }
+
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Auxiliary functions
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(get_max_accel,
+ GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {
+
+TRACE("get_max_accel");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+ int procid;
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+#else
+ procid = 0;
+#endif
+ int size = *sizef;
+ int it = *itf;
+ realw* accel_cpy = (realw*)malloc(size*sizeof(realw));
+ cudaMemcpy(accel_cpy,mp->d_accel,size*sizeof(realw),cudaMemcpyDeviceToHost);
+ realw maxval=0;
+ for(int i=0;i<size;++i) {
+ maxval = MAX(maxval,accel_cpy[i]);
+ }
+ printf("%d/%d: max=%e\n",it,procid,maxval);
+ free(accel_cpy);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ACOUSTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void get_maximum_kernel(realw* array, int size, realw* d_max){
+
+ /* simplest version: uses only 1 thread
+ realw max;
+ max = 0;
+ // finds maximum value in array
+ if( size > 0 ){
+ max = abs(array[0]);
+ for( int i=1; i < size; i++){
+ if( abs(array[i]) > max ) max = abs(array[i]);
+ }
+ }
+ *d_max = max;
+ */
+
+ // reduction example:
+ __shared__ realw sdata[256] ;
+
+ // load shared mem
+ unsigned int tid = threadIdx.x;
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+
+ // loads absolute values into shared memory
+ sdata[tid] = (i < size) ? fabs(array[i]) : 0.0 ;
+
+ __syncthreads();
+
+ // do reduction in shared mem
+ for(unsigned int s=blockDim.x/2; s>0; s>>=1)
+ {
+ if (tid < s){
+ // summation:
+ //sdata[tid] += sdata[tid + s];
+ // maximum:
+ if( sdata[tid] < sdata[tid + s] ) sdata[tid] = sdata[tid + s];
+ }
+ __syncthreads();
+ }
+
+ // write result for this block to global mem
+ if (tid == 0) d_max[blockIdx.x] = sdata[0];
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(get_norm_acoustic_from_device,
+ GET_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
+ long* Mesh_pointer_f,
+ int* SIMULATION_TYPE) {
+
+TRACE("get_norm_acoustic_from_device");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ realw max;
+ realw *d_max;
+
+ max = 0;
+
+ /* way 1 : timing Elapsed time: 8.464813e-03
+ realw* h_array;
+ h_array = (realw*)calloc(mp->NGLOB_AB,sizeof(realw));
+
+ print_CUDA_error_if_any(cudaMemcpy(h_array,mp->d_potential_dot_dot_acoustic,
+ sizeof(realw)*(mp->NGLOB_AB),cudaMemcpyDeviceToHost),131);
+
+ // finds maximum value in array
+ max = h_array[0];
+ for( int i=1; i < mp->NGLOB_AB; i++){
+ if( abs(h_array[i]) > max ) max = abs(h_array[i]);
+ }
+ free(h_array);
+ */
+
+ /* way 2: timing Elapsed time: 8.818102e-02
+ // launch simple kernel
+ cudaMalloc((void**)&d_max,sizeof(realw));
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ mp->NGLOB_AB,
+ d_max);
+ print_CUDA_error_if_any(cudaMemcpy(&max,d_max, sizeof(realw), cudaMemcpyDeviceToHost),222);
+
+ cudaFree(d_max);
+ */
+
+ // way 2 b: timing Elapsed time: 1.236916e-03
+ // launch simple reduction kernel
+ realw* h_max;
+ int blocksize = 256;
+
+ int num_blocks_x = (int) ceil(mp->NGLOB_AB/blocksize);
+ //printf("num_blocks_x %i \n",num_blocks_x);
+
+ h_max = (realw*) calloc(num_blocks_x,sizeof(realw));
+ cudaMalloc((void**)&d_max,num_blocks_x*sizeof(realw));
+
+ dim3 grid(num_blocks_x,1);
+ dim3 threads(blocksize,1,1);
+
+ if(*SIMULATION_TYPE == 1 ){
+ get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ mp->NGLOB_AB,
+ d_max);
+ }
+
+ if(*SIMULATION_TYPE == 3 ){
+ get_maximum_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
+ mp->NGLOB_AB,
+ d_max);
+ }
+
+ print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(realw),cudaMemcpyDeviceToHost),222);
+
+ // determines max for all blocks
+ max = h_max[0];
+ for(int i=1;i<num_blocks_x;i++) {
+ if( max < h_max[i]) max = h_max[i];
+ }
+
+ cudaFree(d_max);
+ free(h_max);
+
+ /* way 3: doesn't work properly...
+ cublasStatus status;
+
+ // Initialize CUBLAS
+ status = cublasInit();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! CUBLAS initialization error\n");
+ exit(1);
+ }
+
+ // cublas function: cublasIsamax
+ // finds the smallest index of the maximum magnitude element of single
+ // precision vector x
+ int incr = 1;
+ int imax = 0;
+ imax = cublasIsamax(mp->NGLOB_AB,(realw*)mp->d_potential_dot_dot_acoustic, incr);
+ status= cublasGetError();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! CUBLAS error in cublasIsamax\n");
+ exit(1);
+ }
+
+ print_CUDA_error_if_any(cudaMemcpy(&max,&(mp->d_potential_dot_dot_acoustic[imax]),
+ sizeof(realw), cudaMemcpyDeviceToHost),222);
+
+ printf("maximum %i %i %f \n",mp->NGLOB_AB,imax,max);
+
+ // Shutdown
+ status = cublasShutdown();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! shutdown error (A)\n");
+ exit(1);
+ }
+
+ */
+
+ // return result
+ *norm = max;
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("after get_norm_acoustic_from_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ELASTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void get_maximum_vector_kernel(realw* array, int size, realw* d_max){
+
+ // reduction example:
+ __shared__ realw sdata[256] ;
+
+ // load shared mem
+ unsigned int tid = threadIdx.x;
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+
+ // loads values into shared memory: assume array is a vector array
+ sdata[tid] = (i < size) ? sqrt(array[i*3]*array[i*3]
+ + array[i*3+1]*array[i*3+1]
+ + array[i*3+2]*array[i*3+2]) : 0.0 ;
+
+ __syncthreads();
+
+ // do reduction in shared mem
+ for(unsigned int s=blockDim.x/2; s>0; s>>=1)
+ {
+ if (tid < s){
+ // summation:
+ //sdata[tid] += sdata[tid + s];
+ // maximum:
+ if( sdata[tid] < sdata[tid + s] ) sdata[tid] = sdata[tid + s];
+ }
+ __syncthreads();
+ }
+
+ // write result for this block to global mem
+ if (tid == 0) d_max[blockIdx.x] = sdata[0];
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(get_norm_elastic_from_device,
+ GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
+ long* Mesh_pointer_f,
+ int* SIMULATION_TYPE) {
+
+ TRACE("get_norm_elastic_from_device");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ realw max;
+ realw *d_max;
+
+ max = 0;
+
+ // launch simple reduction kernel
+ realw* h_max;
+ int blocksize = 256;
+
+ int num_blocks_x = (int) ceil(mp->NGLOB_AB/blocksize);
+ //printf("num_blocks_x %i \n",num_blocks_x);
+
+ h_max = (realw*) calloc(num_blocks_x,sizeof(realw));
+ cudaMalloc((void**)&d_max,num_blocks_x*sizeof(realw));
+
+ dim3 grid(num_blocks_x,1);
+ dim3 threads(blocksize,1,1);
+
+ if(*SIMULATION_TYPE == 1 ){
+ get_maximum_vector_kernel<<<grid,threads>>>(mp->d_displ,
+ mp->NGLOB_AB,
+ d_max);
+ }
+
+ if(*SIMULATION_TYPE == 3 ){
+ get_maximum_vector_kernel<<<grid,threads>>>(mp->d_b_displ,
+ mp->NGLOB_AB,
+ d_max);
+ }
+
+ print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(realw),cudaMemcpyDeviceToHost),222);
+
+ // determines max for all blocks
+ max = h_max[0];
+ for(int i=1;i<num_blocks_x;i++) {
+ if( max < h_max[i]) max = h_max[i];
+ }
+
+ cudaFree(d_max);
+ free(h_max);
+
+ // return result
+ *norm = max;
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("after get_norm_elastic_from_device");
+#endif
+}
+
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,369 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// acoustic sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_add_sources_acoustic_kernel(realw* potential_dot_dot_acoustic,
+ int* ibool,
+ int* ispec_is_inner,
+ int phase_is_inner,
+ realw* sourcearrays,
+ double* stf_pre_compute,
+ int myrank,
+ int* islice_selected_source,
+ int* ispec_selected_source,
+ int* ispec_is_acoustic,
+ realw* kappastore,
+ int NSOURCES) {
+ int i = threadIdx.x;
+ int j = threadIdx.y;
+ int k = threadIdx.z;
+
+ int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx
+
+ int ispec;
+ int iglob;
+ realw stf;
+ realw kappal;
+
+ if( isource < NSOURCES ){
+
+ if(myrank == islice_selected_source[isource]) {
+
+ ispec = ispec_selected_source[isource]-1;
+
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec] ) {
+
+ stf = (realw) stf_pre_compute[isource];
+ iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+ kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
+
+ atomicAdd(&potential_dot_dot_acoustic[iglob],
+ -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal);
+
+ // potential_dot_dot_acoustic[iglob] +=
+ // -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal;
+ }
+ }
+ }
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_add_sources_ac_cuda,
+ COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* NSOURCESf,
+ int* SIMULATION_TYPEf,
+ double* h_stf_pre_compute,
+ int* myrankf) {
+
+TRACE("compute_add_sources_ac_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // check if anything to do
+ if( mp->nsources_local == 0 ) return;
+
+ int phase_is_inner = *phase_is_innerf;
+ int NSOURCES = *NSOURCESf;
+ int myrank = *myrankf;
+
+ int num_blocks_x = NSOURCES;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ // copies pre-computed source time factors onto GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+ NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(5,5,5);
+
+ compute_add_sources_acoustic_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner,
+ mp->d_sourcearrays,
+ mp->d_stf_pre_compute,
+ myrank,
+ mp->d_islice_selected_source,
+ mp->d_ispec_selected_source,
+ mp->d_ispec_is_acoustic,
+ mp->d_kappastore,
+ NSOURCES);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_add_sources_ac_cuda");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_add_sources_ac_s3_cuda,
+ COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* NSOURCESf,
+ int* SIMULATION_TYPEf,
+ double* h_stf_pre_compute,
+ int* myrankf) {
+
+TRACE("compute_add_sources_ac_s3_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // check if anything to do
+ if( mp->nsources_local == 0 ) return;
+
+ int phase_is_inner = *phase_is_innerf;
+ int NSOURCES = *NSOURCESf;
+ int myrank = *myrankf;
+
+ int num_blocks_x = NSOURCES;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ // copies source time factors onto GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+ NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(5,5,5);
+
+ compute_add_sources_acoustic_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner,
+ mp->d_sourcearrays,
+ mp->d_stf_pre_compute,
+ myrank,
+ mp->d_islice_selected_source,
+ mp->d_ispec_selected_source,
+ mp->d_ispec_is_acoustic,
+ mp->d_kappastore,
+ NSOURCES);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_add_sources_ac_s3_cuda");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// acoustic adjoint sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void add_sources_ac_SIM_TYPE_2_OR_3_kernel(realw* potential_dot_dot_acoustic,
+ int nrec,
+ realw* adj_sourcearrays,
+ int* ibool,
+ int* ispec_is_inner,
+ int* ispec_is_acoustic,
+ int* ispec_selected_rec,
+ int phase_is_inner,
+ int* pre_computed_irec,
+ int nadj_rec_local,
+ realw* kappastore) {
+
+ int irec_local = blockIdx.x + gridDim.x*blockIdx.y;
+
+ // because of grid shape, irec_local can be too big
+ if(irec_local < nadj_rec_local) {
+
+ int irec = pre_computed_irec[irec_local];
+
+ int ispec = ispec_selected_rec[irec]-1;
+ if( ispec_is_acoustic[ispec] ){
+
+ // checks if element is in phase_is_inner run
+ if(ispec_is_inner[ispec] == phase_is_inner) {
+ int i = threadIdx.x;
+ int j = threadIdx.y;
+ int k = threadIdx.z;
+
+ int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
+ //kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
+
+ //potential_dot_dot_acoustic[iglob] += adj_sourcearrays[INDEX6(nadj_rec_local,NTSTEP_BETWEEN_ADJSRC,3,5,5,
+ // pre_computed_irec_local_index[irec],
+ // pre_computed_index,
+ // 0,
+ // i,j,k)]/kappal;
+
+ // beware, for acoustic medium, a pressure source would be taking the negative
+ // and divide by Kappa of the fluid;
+ // this would have to be done when constructing the adjoint source.
+ //
+ // note: we take the first component of the adj_sourcearrays
+ // the idea is to have e.g. a pressure source, where all 3 components would be the same
+ realw stf = adj_sourcearrays[INDEX5(5,5,5,3,i,j,k,0,irec_local)]; // / kappal
+
+ atomicAdd(&potential_dot_dot_acoustic[iglob],stf);
+
+ //+adj_sourcearrays[INDEX6(nadj_rec_local,NTSTEP_BETWEEN_ADJSRC,3,5,5,
+ // pre_computed_irec_local_index[irec],pre_computed_index-1,
+ // 0,i,j,k)] // / kappal
+ // );
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(add_sources_ac_sim_2_or_3_cuda,
+ ADD_SOURCES_AC_SIM_2_OR_3_CUDA)(long* Mesh_pointer,
+ realw* h_adj_sourcearrays,
+ int* phase_is_inner,
+ int* h_ispec_is_inner,
+ int* h_ispec_is_acoustic,
+ int* h_ispec_selected_rec,
+ int* myrank,
+ int* nrec,
+ int* time_index,
+ int* h_islice_selected_rec,
+ int* nadj_rec_local,
+ int* NTSTEP_BETWEEN_READ_ADJSRC) {
+
+TRACE("add_sources_ac_sim_2_or_3_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ // checks
+ if( *nadj_rec_local != mp->nadj_rec_local) exit_on_cuda_error("add_sources_ac_sim_type_2_or_3: nadj_rec_local not equal\n");
+
+ // make sure grid dimension is less than 65535 in x dimension
+ int num_blocks_x = mp->nadj_rec_local;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y,1);
+ dim3 threads(5,5,5);
+
+ // build slice of adj_sourcearrays because full array is *very* large.
+ // note: this extracts array values for local adjoint sources at given time step "time_index"
+ // from large adj_sourcearrays array into h_adj_sourcearrays_slice
+ int ispec,i,j,k;
+ int irec_local = 0;
+ for(int irec = 0; irec < *nrec; irec++) {
+ if(*myrank == h_islice_selected_rec[irec]) {
+ irec_local++;
+
+ // takes only acoustic sources
+ ispec = h_ispec_selected_rec[irec]-1;
+ if( h_ispec_is_acoustic[ispec] ){
+
+ if( h_ispec_is_inner[ispec] == *phase_is_inner) {
+ for(k=0;k<5;k++) {
+ for(j=0;j<5;j++) {
+ for(i=0;i<5;i++) {
+ mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,0,irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(mp->nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,(*time_index)-1,
+ 0,i,j,k)];
+
+ mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,1,irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(mp->nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,(*time_index)-1,
+ 1,i,j,k)];
+
+ mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,2,irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(mp->nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,(*time_index)-1,
+ 2,i,j,k)];
+ }
+ }
+ }
+ } // phase_is_inner
+ } // h_ispec_is_acoustic
+ }
+ }
+ // check all local sources were added
+ if( irec_local != mp->nadj_rec_local) exit_on_error("irec_local not equal to nadj_rec_local\n");
+
+ // copies extracted array values onto GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_adj_sourcearrays, mp->h_adj_sourcearrays_slice,
+ (mp->nadj_rec_local)*3*NGLL3*sizeof(realw),cudaMemcpyHostToDevice),99099);
+
+ // launches cuda kernel for acoustic adjoint sources
+ add_sources_ac_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ *nrec,
+ mp->d_adj_sourcearrays,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ mp->d_ispec_is_acoustic,
+ mp->d_ispec_selected_rec,
+ *phase_is_inner,
+ mp->d_pre_computed_irec,
+ mp->nadj_rec_local,
+ mp->d_kappastore);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("add_sources_acoustic_SIM_TYPE_2_OR_3_kernel");
+#endif
+}
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_add_sources_elastic_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,421 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// elastic domain sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_add_sources_kernel(realw* accel,
+ int* ibool,
+ int* ispec_is_inner,
+ int phase_is_inner,
+ realw* sourcearrays,
+ double* stf_pre_compute,
+ int myrank,
+ int* islice_selected_source,
+ int* ispec_selected_source,
+ int* ispec_is_elastic,
+ int NSOURCES) {
+ int i = threadIdx.x;
+ int j = threadIdx.y;
+ int k = threadIdx.z;
+
+ int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx
+ int ispec;
+ int iglob;
+ realw stf;
+
+ if(isource < NSOURCES) { // when NSOURCES > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
+
+ if(myrank == islice_selected_source[isource]) {
+
+ ispec = ispec_selected_source[isource]-1;
+
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] ) {
+
+ stf = (realw) stf_pre_compute[isource];
+ iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
+ atomicAdd(&accel[iglob*3],
+ sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf);
+ atomicAdd(&accel[iglob*3+1],
+ sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 1, i,j,k)]*stf);
+ atomicAdd(&accel[iglob*3+2],
+ sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)]*stf);
+ }
+ }
+ }
+
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_add_sources_el_cuda,
+ COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* NSOURCESf,
+ double* h_stf_pre_compute,
+ int* myrankf) {
+
+TRACE("compute_add_sources_el_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // check if anything to do
+ if( mp->nsources_local == 0 ) return;
+
+ int phase_is_inner = *phase_is_innerf;
+ int NSOURCES = *NSOURCESf;
+ int myrank = *myrankf;
+
+ int num_blocks_x = NSOURCES;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ //double* d_stf_pre_compute;
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+ NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(5,5,5);
+
+ compute_add_sources_kernel<<<grid,threads>>>(mp->d_accel,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner,
+ mp->d_sourcearrays,
+ mp->d_stf_pre_compute,
+ myrank,
+ mp->d_islice_selected_source,
+ mp->d_ispec_selected_source,
+ mp->d_ispec_is_elastic,
+ NSOURCES);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_add_sources_kernel");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_add_sources_el_s3_cuda,
+ COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
+ double* h_stf_pre_compute,
+ int* NSOURCESf,
+ int* phase_is_inner,
+ int* myrank) {
+ TRACE("compute_add_sources_el_s3_cuda");
+ // EPIK_TRACER("compute_add_sources_el_s3_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ int NSOURCES = *NSOURCESf;
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,
+ NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_add_sources_el_s3_cuda");
+#endif
+
+ int num_blocks_x = NSOURCES;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(5,5,5);
+
+ compute_add_sources_kernel<<<grid,threads>>>(mp->d_b_accel,mp->d_ibool,
+ mp->d_ispec_is_inner, *phase_is_inner,
+ mp->d_sourcearrays,
+ mp->d_stf_pre_compute,
+ *myrank,
+ mp->d_islice_selected_source,mp->d_ispec_selected_source,
+ mp->d_ispec_is_elastic,
+ NSOURCES);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_add_sources_el_s3_cuda");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// NOISE sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void add_source_master_rec_noise_cuda_kernel(int* ibool,
+ int* ispec_selected_rec,
+ int irec_master_noise,
+ realw* accel,
+ realw* noise_sourcearray,
+ int it) {
+ int tx = threadIdx.x;
+ int iglob = ibool[tx + NGLL3*(ispec_selected_rec[irec_master_noise-1]-1)]-1;
+
+ // not sure if we need atomic operations but just in case...
+ // accel[3*iglob] += noise_sourcearray[3*tx + 3*125*it];
+ // accel[1+3*iglob] += noise_sourcearray[1+3*tx + 3*125*it];
+ // accel[2+3*iglob] += noise_sourcearray[2+3*tx + 3*125*it];
+
+ atomicAdd(&accel[iglob*3],noise_sourcearray[3*tx + 3*NGLL3*it]);
+ atomicAdd(&accel[iglob*3+1],noise_sourcearray[1+3*tx + 3*NGLL3*it]);
+ atomicAdd(&accel[iglob*3+2],noise_sourcearray[2+3*tx + 3*NGLL3*it]);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(add_source_master_rec_noise_cu,
+ ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f,
+ int* myrank_f,
+ int* it_f,
+ int* irec_master_noise_f,
+ int* islice_selected_rec) {
+
+TRACE("add_source_master_rec_noise_cu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ int it = *it_f-1; // -1 for Fortran -> C indexing differences
+ int irec_master_noise = *irec_master_noise_f;
+ int myrank = *myrank_f;
+
+ dim3 grid(1,1,1);
+ dim3 threads(NGLL3,1,1);
+
+ if(myrank == islice_selected_rec[irec_master_noise-1]) {
+ add_source_master_rec_noise_cuda_kernel<<<grid,threads>>>(mp->d_ibool,
+ mp->d_ispec_selected_rec,
+ irec_master_noise,
+ mp->d_accel,
+ mp->d_noise_sourcearray,
+ it);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("add_source_master_rec_noise_cuda_kernel");
+#endif
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ADJOINT sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void add_sources_el_SIM_TYPE_2_OR_3_kernel(realw* accel,
+ int nrec,
+ realw* adj_sourcearrays,
+ int* ibool,
+ int* ispec_is_inner,
+ int* ispec_is_elastic,
+ int* ispec_selected_rec,
+ int phase_is_inner,
+ int* pre_computed_irec,
+ int nadj_rec_local) {
+
+ int irec_local = blockIdx.x + gridDim.x*blockIdx.y;
+
+ if(irec_local < nadj_rec_local) { // when nrec > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
+
+ int irec = pre_computed_irec[irec_local];
+
+ int ispec = ispec_selected_rec[irec]-1;
+ if( ispec_is_elastic[ispec] ){
+
+ if(ispec_is_inner[ispec] == phase_is_inner) {
+ int i = threadIdx.x;
+ int j = threadIdx.y;
+ int k = threadIdx.z;
+ int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
+ // atomic operations are absolutely necessary for correctness!
+ atomicAdd(&accel[3*iglob],adj_sourcearrays[INDEX5(5,5,5,3,
+ i,j,k,
+ 0,
+ irec_local)]);
+
+ atomicAdd(&accel[1+3*iglob], adj_sourcearrays[INDEX5(5,5,5,3,
+ i,j,k,
+ 1,
+ irec_local)]);
+
+ atomicAdd(&accel[2+3*iglob],adj_sourcearrays[INDEX5(5,5,5,3,
+ i,j,k,
+ 2,
+ irec_local)]);
+ }
+ } // ispec_is_elastic
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(add_sources_el_sim_type_2_or_3,
+ ADD_SOURCES_EL_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
+ realw* h_adj_sourcearrays,
+ int* phase_is_inner,
+ int* h_ispec_is_inner,
+ int* h_ispec_is_elastic,
+ int* h_ispec_selected_rec,
+ int* myrank,
+ int* nrec,
+ int* time_index,
+ int* h_islice_selected_rec,
+ int* nadj_rec_local,
+ int* NTSTEP_BETWEEN_READ_ADJSRC) {
+
+TRACE("add_sources_el_sim_type_2_or_3");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ // checks
+ if( *nadj_rec_local != mp->nadj_rec_local) exit_on_error("add_sources_el_sim_type_2_or_3: nadj_rec_local not equal\n");
+
+ // make sure grid dimension is less than 65535 in x dimension
+ int num_blocks_x = mp->nadj_rec_local;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y,1);
+ dim3 threads(5,5,5);
+
+ // build slice of adj_sourcearrays because full array is *very* large.
+ // note: this extracts array values for local adjoint sources at given time step "time_index"
+ // from large adj_sourcearrays array into h_adj_sourcearrays_slice
+ int ispec,i,j,k;
+ int irec_local = 0;
+ for(int irec = 0; irec < *nrec; irec++) {
+ if(*myrank == h_islice_selected_rec[irec]) {
+ irec_local++;
+
+ // takes only elastic sources
+ ispec = h_ispec_selected_rec[irec]-1;
+ if( h_ispec_is_elastic[ispec] ){
+
+ if( h_ispec_is_inner[ispec] == *phase_is_inner) {
+ for(k=0;k<5;k++) {
+ for(j=0;j<5;j++) {
+ for(i=0;i<5;i++) {
+
+ mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+ i,j,k,0,
+ irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,
+ *time_index-1,
+ 0,i,j,k)];
+
+ mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+ i,j,k,1,
+ irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,
+ *time_index-1,
+ 1,i,j,k)];
+
+ mp->h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+ i,j,k,2,
+ irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,
+ *time_index-1,
+ 2,i,j,k)];
+ }
+ }
+ }
+ } // phase_is_inner
+ } // h_ispec_is_elastic
+ }
+ }
+ // check all local sources were added
+ if( irec_local != mp->nadj_rec_local) exit_on_error("irec_local not equal to nadj_rec_local\n");
+
+ // copies extracted array values onto GPU
+ cudaMemcpy(mp->d_adj_sourcearrays, mp->h_adj_sourcearrays_slice,
+ (mp->nadj_rec_local)*3*NGLL3*sizeof(realw),cudaMemcpyHostToDevice);
+
+
+ // the irec_local variable needs to be precomputed (as
+ // h_pre_comp..), because normally it is in the loop updating accel,
+ // and due to how it's incremented, it cannot be parallelized
+
+ add_sources_el_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_accel,
+ *nrec,
+ mp->d_adj_sourcearrays,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ mp->d_ispec_is_elastic,
+ mp->d_ispec_selected_rec,
+ *phase_is_inner,
+ mp->d_pre_computed_irec,
+ mp->nadj_rec_local);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("add_sources_SIM_TYPE_2_OR_3_kernel");
+#endif
+}
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,482 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ACOUSTIC - ELASTIC coupling
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_coupling_acoustic_el_kernel(realw* displ,
+ realw* potential_dot_dot_acoustic,
+ int num_coupling_ac_el_faces,
+ int* coupling_ac_el_ispec,
+ int* coupling_ac_el_ijk,
+ realw* coupling_ac_el_normal,
+ realw* coupling_ac_el_jacobian2Dw,
+ int* ibool,
+ int* ispec_is_inner,
+ int phase_is_inner) {
+
+ int igll = threadIdx.x;
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
+ int i,j,k,iglob,ispec;
+ realw displ_x,displ_y,displ_z,displ_n;
+ realw nx,ny,nz;
+ realw jacobianw;
+
+ if( iface < num_coupling_ac_el_faces){
+
+ // don't compute points outside NGLLSQUARE==NGLL2==25
+ // way 2: no further check needed since blocksize = 25
+ // if(igll<NGLL2) {
+
+ // "-1" from index values to convert from Fortran-> C indexing
+ ispec = coupling_ac_el_ispec[iface] - 1;
+
+ if(ispec_is_inner[ispec] == phase_is_inner ) {
+
+ i = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1;
+ j = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
+ k = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
+ iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
+
+ // elastic displacement on global point
+ displ_x = displ[iglob*3] ; // (1,iglob)
+ displ_y = displ[iglob*3+1] ; // (2,iglob)
+ displ_z = displ[iglob*3+2] ; // (3,iglob)
+
+ // gets associated normal on GLL point
+ nx = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; // (1,igll,iface)
+ ny = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,1,igll,iface)]; // (2,igll,iface)
+ nz = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,2,igll,iface)]; // (3,igll,iface)
+
+ // calculates displacement component along normal
+ // (normal points outwards of acoustic element)
+ displ_n = displ_x*nx + displ_y*ny + displ_z*nz;
+
+ // gets associated, weighted jacobian
+ jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
+ // continuity of pressure and normal displacement on global point
+
+ // note: Newmark time scheme together with definition of scalar potential:
+ // pressure = - chi_dot_dot
+ // requires that this coupling term uses the updated displacement at time step [t+delta_t],
+ // which is done at the very beginning of the time loop
+ // (see e.g. Chaljub & Vilotte, Nissen-Meyer thesis...)
+ // it also means you have to calculate and update this here first before
+ // calculating the coupling on the elastic side for the acceleration...
+ atomicAdd(&potential_dot_dot_acoustic[iglob],+ jacobianw*displ_n);
+
+ }
+ // }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_coupling_ac_el_cuda,
+ COMPUTE_COUPLING_AC_EL_CUDA)(
+ long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* num_coupling_ac_el_facesf,
+ int* SIMULATION_TYPEf) {
+ TRACE("compute_coupling_ac_el_cuda");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ int phase_is_inner = *phase_is_innerf;
+ int num_coupling_ac_el_faces = *num_coupling_ac_el_facesf;
+ int SIMULATION_TYPE = *SIMULATION_TYPEf;
+
+ // way 1: exact blocksize to match NGLLSQUARE
+ int blocksize = NGLL2;
+ int num_blocks_x = num_coupling_ac_el_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // launches GPU kernel
+ compute_coupling_acoustic_el_kernel<<<grid,threads>>>(mp->d_displ,
+ mp->d_potential_dot_dot_acoustic,
+ num_coupling_ac_el_faces,
+ mp->d_coupling_ac_el_ispec,
+ mp->d_coupling_ac_el_ijk,
+ mp->d_coupling_ac_el_normal,
+ mp->d_coupling_ac_el_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner);
+
+ // adjoint simulations
+ if (SIMULATION_TYPE == 3 ){
+ compute_coupling_acoustic_el_kernel<<<grid,threads>>>(mp->d_b_displ,
+ mp->d_b_potential_dot_dot_acoustic,
+ num_coupling_ac_el_faces,
+ mp->d_coupling_ac_el_ispec,
+ mp->d_coupling_ac_el_ijk,
+ mp->d_coupling_ac_el_normal,
+ mp->d_coupling_ac_el_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner);
+
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("compute_coupling_acoustic_el_kernel");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ELASTIC - ACOUSTIC coupling
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_coupling_elastic_ac_kernel(realw* potential_dot_dot_acoustic,
+ realw* accel,
+ int num_coupling_ac_el_faces,
+ int* coupling_ac_el_ispec,
+ int* coupling_ac_el_ijk,
+ realw* coupling_ac_el_normal,
+ realw* coupling_ac_el_jacobian2Dw,
+ int* ibool,
+ int* ispec_is_inner,
+ int phase_is_inner,
+ int gravity,
+ realw* minus_g,
+ realw* rhostore,
+ realw* displ) {
+
+ int igll = threadIdx.x;
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
+ int i,j,k,iglob,ispec;
+ realw pressure;
+ realw nx,ny,nz;
+ realw jacobianw;
+ realw rhol;
+
+ if( iface < num_coupling_ac_el_faces){
+
+ // don't compute points outside NGLLSQUARE==NGLL2==25
+ // way 2: no further check needed since blocksize = 25
+ // if(igll<NGLL2) {
+
+ // "-1" from index values to convert from Fortran-> C indexing
+ ispec = coupling_ac_el_ispec[iface] - 1;
+
+ if(ispec_is_inner[ispec] == phase_is_inner ) {
+
+ i = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1;
+ j = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
+ k = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
+ iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
+
+ // gets associated normal on GLL point
+ // note: normal points away from acoustic element
+ nx = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; // (1,igll,iface)
+ ny = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,1,igll,iface)]; // (2,igll,iface)
+ nz = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,2,igll,iface)]; // (3,igll,iface)
+
+ // gets associated, weighted jacobian
+ jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
+ // acoustic pressure on global point
+ if( gravity ){
+ // takes density (from acoustic? element)
+ rhol = rhostore[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+
+ // note: uses potential chi such that displacement s = grad(chi),
+ // pressure becomes: p = - kappa ( div( s ) ) = rho ( - dot_dot_chi + g * s )
+ // g only acting in negative z-direction
+
+ // daniel: TODO - check gravity and coupling would be displ * nz correct?
+ pressure = rhol*( - potential_dot_dot_acoustic[iglob]
+ + minus_g[iglob] * displ[iglob*3+2] );
+
+ //daniel: TODO - check gravity and coupling
+ //pressure = - potential_dot_dot_acoustic[iglob] ;
+ //if( iface == 128 && igll == 5 ){
+ // printf("coupling acoustic: %f %f \n",potential_dot_dot_acoustic[iglob],
+ // minus_g[iglob] * displ[iglob*3+2]);
+ //}
+
+ }else{
+ // no gravity: uses potential chi such that displacement s = 1/rho grad(chi)
+ // pressure p = - kappa ( div( s ) ) then becomes: p = - dot_dot_chi
+ // ( multiplied with factor 1/kappa due to setup of equation of motion )
+ pressure = - potential_dot_dot_acoustic[iglob];
+ }
+
+ // continuity of displacement and pressure on global point
+ //
+ // note: Newmark time scheme together with definition of scalar potential:
+ // pressure = - chi_dot_dot
+ // requires that this coupling term uses the *UPDATED* pressure (chi_dot_dot), i.e.
+ // pressure at time step [t + delta_t]
+ // (see e.g. Chaljub & Vilotte, Nissen-Meyer thesis...)
+ // it means you have to calculate and update the acoustic pressure first before
+ // calculating this term...
+ atomicAdd(&accel[iglob*3],+ jacobianw*nx*pressure);
+ atomicAdd(&accel[iglob*3+1],+ jacobianw*ny*pressure);
+ atomicAdd(&accel[iglob*3+2],+ jacobianw*nz*pressure);
+ }
+ // }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_coupling_el_ac_cuda,
+ COMPUTE_COUPLING_EL_AC_CUDA)(
+ long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* num_coupling_ac_el_facesf,
+ int* SIMULATION_TYPEf) {
+ TRACE("compute_coupling_el_ac_cuda");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ int phase_is_inner = *phase_is_innerf;
+ int num_coupling_ac_el_faces = *num_coupling_ac_el_facesf;
+ int SIMULATION_TYPE = *SIMULATION_TYPEf;
+
+ // way 1: exact blocksize to match NGLLSQUARE
+ int blocksize = 25;
+
+ int num_blocks_x = num_coupling_ac_el_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // launches GPU kernel
+ compute_coupling_elastic_ac_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ mp->d_accel,
+ num_coupling_ac_el_faces,
+ mp->d_coupling_ac_el_ispec,
+ mp->d_coupling_ac_el_ijk,
+ mp->d_coupling_ac_el_normal,
+ mp->d_coupling_ac_el_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner,
+ mp->gravity,
+ mp->d_minus_g,
+ mp->d_rhostore,
+ mp->d_displ);
+
+ // adjoint simulations
+ if (SIMULATION_TYPE == 3 ){
+ compute_coupling_elastic_ac_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
+ mp->d_b_accel,
+ num_coupling_ac_el_faces,
+ mp->d_coupling_ac_el_ispec,
+ mp->d_coupling_ac_el_ijk,
+ mp->d_coupling_ac_el_normal,
+ mp->d_coupling_ac_el_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner,
+ mp->gravity,
+ mp->d_minus_g,
+ mp->d_rhostore,
+ mp->d_b_displ);
+
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("compute_coupling_el_ac_cuda");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/* OCEANS load coupled on free surface */
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void compute_coupling_ocean_cuda_kernel(realw* accel,
+ realw* rmass,
+ realw* rmass_ocean_load,
+ int num_free_surface_faces,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ realw* free_surface_normal,
+ int* ibool,
+ int* updated_dof_ocean_load) {
+ // gets spectral element face id
+ int igll = threadIdx.x ; // threadIdx.y*blockDim.x will be always = 0 for thread block (25,1,1)
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
+ realw nx,ny,nz;
+ realw force_normal_comp,additional_term;
+
+ // for all faces on free surface
+ if( iface < num_free_surface_faces ){
+
+ int ispec = free_surface_ispec[iface]-1;
+
+ // gets global point index
+ int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface)
+ int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
+ int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
+
+ int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
+
+ //if(igll == 0 ) printf("igll %d %d %d %d\n",igll,i,j,k,iglob);
+
+ // only update this global point once
+
+ // daniel: TODO - there might be better ways to implement a mutex like below,
+ // and find a workaround to not use the temporary update array.
+ // atomicExch: returns the old value, i.e. 0 indicates that we still have to do this point
+
+ if( atomicExch(&updated_dof_ocean_load[iglob],1) == 0){
+
+ // get normal
+ nx = free_surface_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; //(1,igll,iface)
+ ny = free_surface_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
+ nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
+
+ // make updated component of right-hand side
+ // we divide by rmass() which is 1 / M
+ // we use the total force which includes the Coriolis term above
+ force_normal_comp = ( accel[iglob*3]*nx + accel[iglob*3+1]*ny + accel[iglob*3+2]*nz ) / rmass[iglob];
+
+ additional_term = (rmass_ocean_load[iglob] - rmass[iglob]) * force_normal_comp;
+
+ // probably wouldn't need atomicAdd anymore, but just to be sure...
+ atomicAdd(&accel[iglob*3], + additional_term * nx);
+ atomicAdd(&accel[iglob*3+1], + additional_term * ny);
+ atomicAdd(&accel[iglob*3+2], + additional_term * nz);
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_coupling_ocean_cuda,
+ COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f,
+ int* SIMULATION_TYPE) {
+
+ TRACE("compute_coupling_ocean_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // checks if anything to do
+ if( mp->num_free_surface_faces == 0 ) return;
+
+ // block sizes: exact blocksize to match NGLLSQUARE
+ int blocksize = NGLL2;
+
+ int num_blocks_x = mp->num_free_surface_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+
+ // initializes temporary array to zero
+ print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
+ sizeof(int)*mp->NGLOB_AB),88501);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("before kernel compute_coupling_ocean_cuda");
+#endif
+
+ compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_accel,
+ mp->d_rmass,
+ mp->d_rmass_ocean_load,
+ mp->num_free_surface_faces,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ mp->d_free_surface_normal,
+ mp->d_ibool,
+ mp->d_updated_dof_ocean_load);
+ // for backward/reconstructed potentials
+ if(*SIMULATION_TYPE == 3) {
+ // re-initializes array
+ print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
+ sizeof(int)*mp->NGLOB_AB),88502);
+
+ compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_b_accel,
+ mp->d_rmass,
+ mp->d_rmass_ocean_load,
+ mp->num_free_surface_faces,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ mp->d_free_surface_normal,
+ mp->d_ibool,
+ mp->d_updated_dof_ocean_load);
+
+ }
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_coupling_ocean_cuda");
+#endif
+}
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,1826 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+// cuda constant arrays
+__constant__ realw d_hprime_xx[NGLL2];
+__constant__ realw d_hprime_yy[NGLL2];
+__constant__ realw d_hprime_zz[NGLL2];
+__constant__ realw d_hprimewgll_xx[NGLL2];
+__constant__ realw d_hprimewgll_yy[NGLL2];
+__constant__ realw d_hprimewgll_zz[NGLL2];
+__constant__ realw d_wgllwgll_xy[NGLL2];
+__constant__ realw d_wgllwgll_xz[NGLL2];
+__constant__ realw d_wgllwgll_yz[NGLL2];
+
+__constant__ realw d_wgll_cube[NGLL3]; // needed only for gravity case
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// CONSTANT arrays setup
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/* note:
+ constant arrays when used in other compute_forces_***_cuda.cu routines stay zero,
+ constant declaration and cudaMemcpyToSymbol would have to be in the same file...
+
+ extern keyword doesn't work for __constant__ declarations.
+
+ also:
+ cudaMemcpyToSymbol("deviceCaseParams", caseParams, sizeof(CaseParams));
+ ..
+ and compile with -arch=sm_20
+
+ see also: http://stackoverflow.com/questions/4008031/how-to-use-cuda-constant-memory-in-a-programmer-pleasant-way
+ doesn't seem to work.
+
+ we could keep arrays separated for acoustic and elastic routines...
+
+ workaround:
+
+ for now, we store pointers with cudaGetSymbolAddress() function calls.
+ we pass those pointers in all other compute_forces_..() routines
+
+ in this file, we can use the above constant array declarations without need of the pointers.
+
+ */
+
+// constant arrays
+
+void setConst_hprime_xx(realw* array,Mesh* mp)
+{
+
+ cudaError_t err = cudaMemcpyToSymbol(d_hprime_xx, array, NGLL2*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_hprime_xx: %s\n", cudaGetErrorString(err));
+ fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
+ exit(1);
+ }
+
+ err = cudaGetSymbolAddress((void**)&(mp->d_hprime_xx),"d_hprime_xx");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_hprime_xx: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+}
+
+void setConst_hprime_yy(realw* array,Mesh* mp)
+{
+
+ cudaError_t err = cudaMemcpyToSymbol(d_hprime_yy, array, NGLL2*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_hprime_yy: %s\n", cudaGetErrorString(err));
+ fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
+ exit(1);
+ }
+
+ err = cudaGetSymbolAddress((void**)&(mp->d_hprime_yy),"d_hprime_yy");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_hprime_yy: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+}
+
+void setConst_hprime_zz(realw* array,Mesh* mp)
+{
+
+ cudaError_t err = cudaMemcpyToSymbol(d_hprime_zz, array, NGLL2*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_hprime_zz: %s\n", cudaGetErrorString(err));
+ fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
+ exit(1);
+ }
+
+ err = cudaGetSymbolAddress((void**)&(mp->d_hprime_zz),"d_hprime_zz");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_hprime_zz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+}
+
+
+void setConst_hprimewgll_xx(realw* array,Mesh* mp)
+{
+ cudaError_t err = cudaMemcpyToSymbol(d_hprimewgll_xx, array, NGLL2*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_hprimewgll_xx: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+
+ err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_xx),"d_hprimewgll_xx");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_hprimewgll_xx: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+}
+
+void setConst_hprimewgll_yy(realw* array,Mesh* mp)
+{
+ cudaError_t err = cudaMemcpyToSymbol(d_hprimewgll_yy, array, NGLL2*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_hprimewgll_yy: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+
+ err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_yy),"d_hprimewgll_yy");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_hprimewgll_yy: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+}
+
+void setConst_hprimewgll_zz(realw* array,Mesh* mp)
+{
+ cudaError_t err = cudaMemcpyToSymbol(d_hprimewgll_zz, array, NGLL2*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_hprimewgll_zz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+
+ err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_zz),"d_hprimewgll_zz");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_hprimewgll_zz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+}
+
+void setConst_wgllwgll_xy(realw* array,Mesh* mp)
+{
+ cudaError_t err = cudaMemcpyToSymbol(d_wgllwgll_xy, array, NGLL2*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_wgllwgll_xy: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ //mp->d_wgllwgll_xy = d_wgllwgll_xy;
+ err = cudaGetSymbolAddress((void**)&(mp->d_wgllwgll_xy),"d_wgllwgll_xy");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_wgllwgll_xy: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+
+}
+
+void setConst_wgllwgll_xz(realw* array,Mesh* mp)
+{
+ cudaError_t err = cudaMemcpyToSymbol(d_wgllwgll_xz, array, NGLL2*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_wgllwgll_xz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ //mp->d_wgllwgll_xz = d_wgllwgll_xz;
+ err = cudaGetSymbolAddress((void**)&(mp->d_wgllwgll_xz),"d_wgllwgll_xz");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_wgllwgll_xz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+
+}
+
+void setConst_wgllwgll_yz(realw* array,Mesh* mp)
+{
+ cudaError_t err = cudaMemcpyToSymbol(d_wgllwgll_yz, array, NGLL2*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_wgllwgll_yz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ //mp->d_wgllwgll_yz = d_wgllwgll_yz;
+ err = cudaGetSymbolAddress((void**)&(mp->d_wgllwgll_yz),"d_wgllwgll_yz");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_wgllwgll_yz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+
+}
+
+void setConst_wgll_cube(realw* array,Mesh* mp)
+{
+ cudaError_t err = cudaMemcpyToSymbol(d_wgll_cube, array, NGLL3*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_wgll_cube: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ //mp->d_wgll_cube = d_wgll_cube;
+ err = cudaGetSymbolAddress((void**)&(mp->d_wgll_cube),"d_wgll_cube");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_wgll_cube: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// elemental routines
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// updates stress
+
+__device__ void compute_element_cm_att_stress(int tx,int working_element,
+ realw* R_xx,
+ realw* R_yy,
+ realw* R_xy,
+ realw* R_xz,
+ realw* R_yz,
+ reald* sigma_xx,
+ reald* sigma_yy,
+ reald* sigma_zz,
+ reald* sigma_xy,
+ reald* sigma_xz,
+ reald* sigma_yz) {
+
+ int i_sls,offset;
+ reald R_xx_val,R_yy_val;
+
+ for(i_sls = 0; i_sls < N_SLS; i_sls++){
+ // index
+ // note: index for R_xx,.. here is (i_sls,i,j,k,ispec) and not (i,j,k,ispec,i_sls) as in local version
+ // local version: offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls);
+ offset = i_sls + N_SLS*(tx + NGLL3*working_element);
+
+ R_xx_val = R_xx[offset];
+ R_yy_val = R_yy[offset];
+
+ *sigma_xx = *sigma_xx - R_xx_val;
+ *sigma_yy = *sigma_yy - R_yy_val;
+ *sigma_zz = *sigma_zz + R_xx_val + R_yy_val;
+ *sigma_xy = *sigma_xy - R_xy[offset];
+ *sigma_xz = *sigma_xz - R_xz[offset];
+ *sigma_yz = *sigma_yz - R_yz[offset];
+ }
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// updates R_memory
+
+__device__ void compute_element_cm_att_memory(int tx,int working_element,
+ realw* d_muvstore,
+ realw* factor_common,
+ realw* alphaval,realw* betaval,realw* gammaval,
+ realw* R_xx,realw* R_yy,realw* R_xy,realw* R_xz,realw* R_yz,
+ realw* epsilondev_xx,realw* epsilondev_yy,realw* epsilondev_xy,
+ realw* epsilondev_xz,realw* epsilondev_yz,
+ reald epsilondev_xx_loc,reald epsilondev_yy_loc,reald epsilondev_xy_loc,
+ reald epsilondev_xz_loc,reald epsilondev_yz_loc,
+ int ANISOTROPY,
+ realw* d_c44store
+ ){
+
+ int i_sls;
+ int ijk_ispec;
+ int offset_align,offset;
+ reald fac;
+ reald alphaval_loc,betaval_loc,gammaval_loc;
+ reald factor_loc,Sn,Snp1;
+
+ // indices
+ offset_align = tx + NGLL3_PADDED * working_element;
+ ijk_ispec = tx + NGLL3 * working_element;
+
+ // shear moduli for common factor (only Q_mu attenuation)
+ if( ANISOTROPY ){
+ fac = d_c44store[offset_align];
+ }else{
+ fac = d_muvstore[offset_align];
+ }
+
+ // use Runge-Kutta scheme to march in time
+ for(i_sls = 0; i_sls < N_SLS; i_sls++){
+ // indices
+ // note: index for R_xx,... here is (i_sls,i,j,k,ispec) and not (i,j,k,ispec,i_sls) as in local version
+ // local version: offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls);
+ // index for (i_sls,i,j,k,ispec)
+ offset = i_sls + N_SLS*(tx + NGLL3*working_element);
+ // index for (i,j,k,ispec,i_sls)
+ //offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls);
+
+ // either mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
+ // or factor_common(i_sls,:,:,:,ispec) * c44store(:,:,:,ispec)
+ factor_loc = fac * factor_common[offset];
+
+ alphaval_loc = alphaval[i_sls]; // (i_sls)
+ betaval_loc = betaval[i_sls];
+ gammaval_loc = gammaval[i_sls];
+
+ // term in xx
+ Sn = factor_loc * epsilondev_xx[ijk_ispec]; //(i,j,k,ispec)
+ Snp1 = factor_loc * epsilondev_xx_loc; //(i,j,k)
+ R_xx[offset] = alphaval_loc * R_xx[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+
+ // term in yy
+ Sn = factor_loc * epsilondev_yy[ijk_ispec];
+ Snp1 = factor_loc * epsilondev_yy_loc;
+ R_yy[offset] = alphaval_loc * R_yy[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+ // term in zz not computed since zero trace
+
+ // term in xy
+ Sn = factor_loc * epsilondev_xy[ijk_ispec];
+ Snp1 = factor_loc * epsilondev_xy_loc;
+ R_xy[offset] = alphaval_loc * R_xy[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+
+ // term in xz
+ Sn = factor_loc * epsilondev_xz[ijk_ispec];
+ Snp1 = factor_loc * epsilondev_xz_loc;
+ R_xz[offset] = alphaval_loc * R_xz[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+
+ // term in yz
+ Sn = factor_loc * epsilondev_yz[ijk_ispec];
+ Snp1 = factor_loc * epsilondev_yz_loc;
+ R_yz[offset] = alphaval_loc * R_yz[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+ }
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// pre-computes gravity term
+
+__device__ void compute_element_cm_gravity(int tx,int working_element,
+ int* d_ibool,
+ realw* d_xstore,realw* d_ystore,realw* d_zstore,
+ realw* d_minus_gravity_table,
+ realw* d_minus_deriv_gravity_table,
+ realw* d_density_table,
+ realw* wgll_cube,
+ reald jacobianl,
+ reald* s_dummyx_loc,
+ reald* s_dummyy_loc,
+ reald* s_dummyz_loc,
+ reald* sigma_xx,
+ reald* sigma_yy,
+ reald* sigma_zz,
+ reald* sigma_xy,
+ reald* sigma_yx,
+ reald* sigma_xz,
+ reald* sigma_zx,
+ reald* sigma_yz,
+ reald* sigma_zy,
+ reald* rho_s_H1,
+ reald* rho_s_H2,
+ reald* rho_s_H3){
+
+ reald radius,theta,phi;
+ reald cos_theta,sin_theta,cos_phi,sin_phi;
+ reald minus_g,minus_dg;
+ reald rho;
+ reald gxl,gyl,gzl;
+ reald minus_g_over_radius,minus_dg_plus_g_over_radius;
+ reald cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq;
+ reald Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl;
+ reald sx_l,sy_l,sz_l;
+ reald factor;
+
+ // R_EARTH_KM is the radius of the bottom of the oceans (radius of Earth in km)
+ const reald R_EARTH_KM = 6371.0f;
+ // uncomment line below for PREM with oceans
+ //const reald R_EARTH_KM = 6368.0f;
+
+ // compute non-symmetric terms for gravity
+
+ // use mesh coordinates to get theta and phi
+ // x y z contain r theta phi
+ int iglob = d_ibool[working_element*NGLL3 + tx]-1;
+
+ radius = d_xstore[iglob];
+ theta = d_ystore[iglob];
+ phi = d_zstore[iglob];
+
+ cos_theta = cos(theta);
+ sin_theta = sin(theta);
+ cos_phi = cos(phi);
+ sin_phi = sin(phi);
+
+ // for efficiency replace with lookup table every 100 m in radial direction
+ // note: radius in crust mantle should never be zero,
+ // and arrays in C start from 0, thus we need to subtract -1
+ int int_radius = rint(radius * R_EARTH_KM * 10.0f ) - 1;
+
+ // get g, rho and dg/dr=dg
+ // spherical components of the gravitational acceleration
+ // for efficiency replace with lookup table every 100 m in radial direction
+ minus_g = d_minus_gravity_table[int_radius];
+ minus_dg = d_minus_deriv_gravity_table[int_radius];
+ rho = d_density_table[int_radius];
+
+ // Cartesian components of the gravitational acceleration
+ gxl = minus_g*sin_theta*cos_phi;
+ gyl = minus_g*sin_theta*sin_phi;
+ gzl = minus_g*cos_theta;
+
+ // Cartesian components of gradient of gravitational acceleration
+ // obtained from spherical components
+
+ minus_g_over_radius = minus_g / radius;
+ minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius;
+
+ cos_theta_sq = cos_theta*cos_theta;
+ sin_theta_sq = sin_theta*sin_theta;
+ cos_phi_sq = cos_phi*cos_phi;
+ sin_phi_sq = sin_phi*sin_phi;
+
+ Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq;
+ Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq;
+ Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq;
+ Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq;
+ Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta;
+ Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta;
+
+ // get displacement and multiply by density to compute G tensor
+ sx_l = rho * s_dummyx_loc[tx];
+ sy_l = rho * s_dummyy_loc[tx];
+ sz_l = rho * s_dummyz_loc[tx];
+
+ // compute G tensor from s . g and add to sigma (not symmetric)
+ *sigma_xx = *sigma_xx + sy_l*gyl + sz_l*gzl;
+ *sigma_yy = *sigma_yy + sx_l*gxl + sz_l*gzl;
+ *sigma_zz = *sigma_zz + sx_l*gxl + sy_l*gyl;
+
+ *sigma_xy = *sigma_xy - sx_l * gyl;
+ *sigma_yx = *sigma_yx - sy_l * gxl;
+
+ *sigma_xz = *sigma_xz - sx_l * gzl;
+ *sigma_zx = *sigma_zx - sz_l * gxl;
+
+ *sigma_yz = *sigma_yz - sy_l * gzl;
+ *sigma_zy = *sigma_zy - sz_l * gyl;
+
+ // precompute vector
+ factor = jacobianl * wgll_cube[tx];
+ *rho_s_H1 = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl);
+ *rho_s_H2 = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl);
+ *rho_s_H3 = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl);
+
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// computes stresses for anisotropic element
+
+__device__ void compute_element_cm_aniso(int offset,
+ realw* d_c11store,realw* d_c12store,realw* d_c13store,
+ realw* d_c14store,realw* d_c15store,realw* d_c16store,
+ realw* d_c22store,realw* d_c23store,realw* d_c24store,
+ realw* d_c25store,realw* d_c26store,realw* d_c33store,
+ realw* d_c34store,realw* d_c35store,realw* d_c36store,
+ realw* d_c44store,realw* d_c45store,realw* d_c46store,
+ realw* d_c55store,realw* d_c56store,realw* d_c66store,
+ int ATTENUATION,
+ reald minus_sum_beta,
+ reald duxdxl,reald duxdyl,reald duxdzl,
+ reald duydxl,reald duydyl,reald duydzl,
+ reald duzdxl,reald duzdyl,reald duzdzl,
+ reald duxdxl_plus_duydyl,reald duxdxl_plus_duzdzl,reald duydyl_plus_duzdzl,
+ reald duxdyl_plus_duydxl,reald duzdxl_plus_duxdzl,reald duzdyl_plus_duydzl,
+ reald* sigma_xx,reald* sigma_yy,reald* sigma_zz,
+ reald* sigma_xy,reald* sigma_xz,reald* sigma_yz
+ ){
+
+ reald c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,c33,c34,c35,c36,c44,c45,c46,c55,c56,c66;
+ reald mul;
+
+ c11 = d_c11store[offset];
+ c12 = d_c12store[offset];
+ c13 = d_c13store[offset];
+ c14 = d_c14store[offset];
+ c15 = d_c15store[offset];
+ c16 = d_c16store[offset];
+ c22 = d_c22store[offset];
+ c23 = d_c23store[offset];
+ c24 = d_c24store[offset];
+ c25 = d_c25store[offset];
+ c26 = d_c26store[offset];
+ c33 = d_c33store[offset];
+ c34 = d_c34store[offset];
+ c35 = d_c35store[offset];
+ c36 = d_c36store[offset];
+ c44 = d_c44store[offset];
+ c45 = d_c45store[offset];
+ c46 = d_c46store[offset];
+ c55 = d_c55store[offset];
+ c56 = d_c56store[offset];
+ c66 = d_c66store[offset];
+
+ // use unrelaxed parameters if attenuation
+ if( ATTENUATION){
+ mul = c44;
+ c11 = c11 + 1.33333333333333333333f * minus_sum_beta * mul;
+ c12 = c12 - 0.66666666666666666666f * minus_sum_beta * mul;
+ c13 = c13 - 0.66666666666666666666f * minus_sum_beta * mul;
+ c22 = c22 + 1.33333333333333333333f * minus_sum_beta * mul;
+ c23 = c23 - 0.66666666666666666666f * minus_sum_beta * mul;
+ c33 = c33 + 1.33333333333333333333f * minus_sum_beta * mul;
+ c44 = c44 + minus_sum_beta * mul;
+ c55 = c55 + minus_sum_beta * mul;
+ c66 = c66 + minus_sum_beta * mul;
+ }
+
+ *sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl +
+ c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl;
+ *sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl +
+ c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl;
+ *sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl +
+ c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl;
+ *sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl +
+ c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl;
+ *sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl +
+ c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl;
+ *sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl +
+ c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl;
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// computes stresses for isotropic element
+
+__device__ void compute_element_cm_iso(int offset,
+ realw* d_kappavstore,realw* d_muvstore,
+ int ATTENUATION,
+ reald one_minus_sum_beta_use,
+ reald duxdxl,reald duydyl,reald duzdzl,
+ reald duxdxl_plus_duydyl,reald duxdxl_plus_duzdzl,reald duydyl_plus_duzdzl,
+ reald duxdyl_plus_duydxl,reald duzdxl_plus_duxdzl,reald duzdyl_plus_duydzl,
+ reald* sigma_xx,reald* sigma_yy,reald* sigma_zz,
+ reald* sigma_xy,reald* sigma_xz,reald* sigma_yz){
+
+ reald lambdal,mul,lambdalplus2mul,kappal;
+
+ // compute elements with an elastic isotropic rheology
+ kappal = d_kappavstore[offset];
+ mul = d_muvstore[offset];
+
+ // use unrelaxed parameters if attenuation
+ if( ATTENUATION ){ mul = mul * one_minus_sum_beta_use; }
+
+ lambdalplus2mul = kappal + 1.33333333333333333333f * mul; // 4./3. = 1.3333333
+ lambdal = lambdalplus2mul - 2.0f * mul;
+
+ // compute the six components of the stress tensor sigma
+ *sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl;
+ *sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl;
+ *sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl;
+
+ *sigma_xy = mul*duxdyl_plus_duydxl;
+ *sigma_xz = mul*duzdxl_plus_duxdzl;
+ *sigma_yz = mul*duzdyl_plus_duydzl;
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// computes stresses for transversely isotropic element
+
+__device__ void compute_element_cm_tiso(int offset,
+ realw* d_kappavstore,realw* d_muvstore,
+ realw* d_kappahstore,realw* d_muhstore,realw* d_eta_anisostore,
+ int ATTENUATION,
+ reald one_minus_sum_beta_use,
+ reald duxdxl,reald duxdyl,reald duxdzl,
+ reald duydxl,reald duydyl,reald duydzl,
+ reald duzdxl,reald duzdyl,reald duzdzl,
+ reald duxdxl_plus_duydyl,reald duxdxl_plus_duzdzl,reald duydyl_plus_duzdzl,
+ reald duxdyl_plus_duydxl,reald duzdxl_plus_duxdzl,reald duzdyl_plus_duydzl,
+ int iglob,int NGLOB,
+ realw* d_ystore, realw* d_zstore,
+ reald* sigma_xx,reald* sigma_yy,reald* sigma_zz,
+ reald* sigma_xy,reald* sigma_xz,reald* sigma_yz){
+
+ reald kappavl,muvl,kappahl,muhl;
+ reald rhovpvsq,rhovphsq,rhovsvsq,rhovshsq,eta_aniso;
+ reald costheta,sintheta,cosphi,sinphi;
+ reald costhetasq,sinthetasq,cosphisq,sinphisq,costhetafour,sinthetafour,cosphifour,sinphifour;
+ reald costwotheta,sintwotheta,costwophi,sintwophi,cosfourtheta,cosfourphi;
+ reald costwothetasq,costwophisq,sintwophisq;
+ reald etaminone,twoetaminone;
+ reald two_eta_aniso,four_eta_aniso,six_eta_aniso;
+ reald two_rhovsvsq,two_rhovshsq; // two_rhovpvsq,two_rhovphsq
+ reald four_rhovsvsq,four_rhovshsq; // four_rhovpvsq,four_rhovphsq
+ reald c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,c33,c34,c35,c36,c44,c45,c46,c55,c56,c66;
+
+ // cosine and sine function in CUDA only supported for float
+ reald theta,phi;
+
+ // use Kappa and mu from transversely isotropic model
+ kappavl = d_kappavstore[offset];
+ muvl = d_muvstore[offset];
+
+ kappahl = d_kappahstore[offset];
+ muhl = d_muhstore[offset];
+
+ // use unrelaxed parameters if attenuation
+ // eta does not need to be shifted since it is a ratio
+ if( ATTENUATION ){
+ muvl = muvl * one_minus_sum_beta_use;
+ muhl = muhl * one_minus_sum_beta_use;
+ }
+
+ rhovpvsq = kappavl + 1.33333333333333333333f * muvl ; //!!! that is C
+ rhovphsq = kappahl + 1.33333333333333333333f * muhl ; //!!! that is A
+
+ rhovsvsq = muvl; // !!! that is L
+ rhovshsq = muhl; //!!! that is N
+
+ eta_aniso = d_eta_anisostore[offset]; // !!! that is F / (A - 2 L)
+
+ // use mesh coordinates to get theta and phi
+ //ystore and zstore contain theta and phi
+ theta = d_ystore[iglob];
+ phi = d_zstore[iglob];
+
+ if( sizeof( theta ) == sizeof( float ) ){
+ // float operations
+
+ // daniel: TODO - sincos function
+ // or: sincosf(theta, &sintheta, &costheta);
+ // or with loss of accuracy: __sincosf(theta, &sintheta, &costheta);
+ // or compile with: -use_fast_math
+
+ costheta = cosf(theta);
+ sintheta = sinf(theta);
+
+ cosphi = cosf(phi);
+ sinphi = sinf(phi);
+
+ costwotheta = cosf(2.0f * theta);
+ sintwotheta = sinf(2.0f * theta);
+ costwophi = cosf(2.0f * phi);
+ sintwophi = sinf(2.0f * phi);
+ cosfourtheta = cosf(4.0f * theta);
+ cosfourphi = cosf(4.0f * phi);
+ }else{
+ // double operations
+ costheta = cos(theta);
+ sintheta = sin(theta);
+
+ cosphi = cos(phi);
+ sinphi = sin(phi);
+
+ costwotheta = cos(2.0f * theta);
+ sintwotheta = sin(2.0f * theta);
+ costwophi = cos(2.0f * phi);
+ sintwophi = sin(2.0f * phi);
+
+ cosfourtheta = cos(4.0f * theta);
+ cosfourphi = cos(4.0f * phi);
+ }
+
+ costhetasq = costheta * costheta;
+ sinthetasq = sintheta * sintheta;
+ cosphisq = cosphi * cosphi;
+ sinphisq = sinphi * sinphi;
+
+ costhetafour = costhetasq * costhetasq;
+ sinthetafour = sinthetasq * sinthetasq;
+ cosphifour = cosphisq * cosphisq;
+ sinphifour = sinphisq * sinphisq;
+
+ costwothetasq = costwotheta * costwotheta;
+
+ costwophisq = costwophi * costwophi;
+ sintwophisq = sintwophi * sintwophi;
+
+ etaminone = eta_aniso - 1.0f;
+ twoetaminone = 2.0f * eta_aniso - 1.0f;
+
+ // precompute some products to reduce the CPU time
+
+ two_eta_aniso = 2.0f * eta_aniso;
+ four_eta_aniso = 4.0f * eta_aniso;
+ six_eta_aniso = 6.0f * eta_aniso;
+
+ //two_rhovpvsq = 2.0f * rhovpvsq;
+ //two_rhovphsq = 2.0f * rhovphsq;
+ two_rhovsvsq = 2.0f * rhovsvsq;
+ two_rhovshsq = 2.0f * rhovshsq;
+
+ //four_rhovpvsq = 4.0f * rhovpvsq;
+ //four_rhovphsq = 4.0f * rhovphsq;
+ four_rhovsvsq = 4.0f * rhovsvsq;
+ four_rhovshsq = 4.0f * rhovshsq;
+
+ // the 21 anisotropic coefficients computed using Mathematica
+
+ c11 = rhovphsq*sinphifour + 2.0f*cosphisq*sinphisq*
+ (rhovphsq*costhetasq + (eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)*
+ sinthetasq) + cosphifour*
+ (rhovphsq*costhetafour + 2.0f*(eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)*
+ costhetasq*sinthetasq + rhovpvsq*sinthetafour);
+
+ c12 = ((rhovphsq - two_rhovshsq)*(3.0f + cosfourphi)*costhetasq)*0.25f -
+ four_rhovshsq*cosphisq*costhetasq*sinphisq +
+ (rhovphsq*(11.0f + 4.0f*costwotheta + cosfourtheta)*sintwophisq)*0.03125f +
+ eta_aniso*(rhovphsq - two_rhovsvsq)*(cosphifour +
+ 2.0f*cosphisq*costhetasq*sinphisq + sinphifour)*sinthetasq +
+ rhovpvsq*cosphisq*sinphisq*sinthetafour -
+ rhovsvsq*sintwophisq*sinthetafour;
+
+ c13 = (cosphisq*(rhovphsq + six_eta_aniso*rhovphsq + rhovpvsq - four_rhovsvsq -
+ 12.0f*eta_aniso*rhovsvsq + (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq -
+ four_eta_aniso*rhovsvsq)*cosfourtheta))*0.125f +
+ sinphisq*(eta_aniso*(rhovphsq - two_rhovsvsq)*costhetasq +
+ (rhovphsq - two_rhovshsq)*sinthetasq);
+
+ c14 = costheta*sinphi*((cosphisq*
+ (-rhovphsq + rhovpvsq + four_rhovshsq - four_rhovsvsq +
+ (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq -
+ four_eta_aniso*rhovsvsq)*costwotheta))*0.5f +
+ (etaminone*rhovphsq + 2.0f*(rhovshsq - eta_aniso*rhovsvsq))*sinphisq)* sintheta;
+
+ c15 = cosphi*costheta*((cosphisq* (-rhovphsq + rhovpvsq +
+ (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)*
+ costwotheta))*0.5f + etaminone*(rhovphsq - two_rhovsvsq)*sinphisq)*sintheta;
+
+ c16 = (cosphi*sinphi*(cosphisq* (-rhovphsq + rhovpvsq +
+ (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq -
+ four_eta_aniso*rhovsvsq)*costwotheta) +
+ 2.0f*etaminone*(rhovphsq - two_rhovsvsq)*sinphisq)*sinthetasq)*0.5f;
+
+ c22 = rhovphsq*cosphifour + 2.0f*cosphisq*sinphisq*
+ (rhovphsq*costhetasq + (eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)*
+ sinthetasq) + sinphifour*
+ (rhovphsq*costhetafour + 2.0f*(eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)*
+ costhetasq*sinthetasq + rhovpvsq*sinthetafour);
+
+ c23 = ((rhovphsq + six_eta_aniso*rhovphsq + rhovpvsq - four_rhovsvsq - 12.0f*eta_aniso*rhovsvsq +
+ (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)*
+ cosfourtheta)*sinphisq)*0.125f +
+ cosphisq*(eta_aniso*(rhovphsq - two_rhovsvsq)*costhetasq +
+ (rhovphsq - two_rhovshsq)*sinthetasq);
+
+ c24 = costheta*sinphi*(etaminone*(rhovphsq - two_rhovsvsq)*cosphisq +
+ ((-rhovphsq + rhovpvsq + (twoetaminone*rhovphsq - rhovpvsq +
+ four_rhovsvsq - four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)*0.5f)*sintheta;
+
+ c25 = cosphi*costheta*((etaminone*rhovphsq + 2.0f*(rhovshsq - eta_aniso*rhovsvsq))*
+ cosphisq + ((-rhovphsq + rhovpvsq + four_rhovshsq - four_rhovsvsq +
+ (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq -
+ four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)*0.5f)*sintheta;
+
+ c26 = (cosphi*sinphi*(2.0f*etaminone*(rhovphsq - two_rhovsvsq)*cosphisq +
+ (-rhovphsq + rhovpvsq + (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq -
+ four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)*sinthetasq)*0.5f;
+
+ c33 = rhovpvsq*costhetafour + 2.0f*(eta_aniso*(rhovphsq - two_rhovsvsq) + two_rhovsvsq)*
+ costhetasq*sinthetasq + rhovphsq*sinthetafour;
+
+ c34 = -((rhovphsq - rhovpvsq + (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq
+ - four_eta_aniso*rhovsvsq)*costwotheta)*sinphi*sintwotheta)*0.25f;
+
+ c35 = -(cosphi*(rhovphsq - rhovpvsq +
+ (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)*
+ costwotheta)*sintwotheta)*0.25f;
+
+ c36 = -((rhovphsq - rhovpvsq - four_rhovshsq + four_rhovsvsq +
+ (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)*
+ costwotheta)*sintwophi*sinthetasq)*0.25f;
+
+ c44 = cosphisq*(rhovsvsq*costhetasq + rhovshsq*sinthetasq) +
+ sinphisq*(rhovsvsq*costwothetasq +
+ (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq + four_eta_aniso*rhovsvsq)*costhetasq* sinthetasq);
+
+ c45 = ((rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq - two_rhovshsq - two_rhovsvsq +
+ four_eta_aniso*rhovsvsq + (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq +
+ 4.0f*etaminone*rhovsvsq)*costwotheta)*sintwophi*sinthetasq)*0.25f;
+
+ c46 = -(cosphi*costheta*((rhovshsq - rhovsvsq)*cosphisq -
+ ((rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq - two_rhovshsq - two_rhovsvsq +
+ four_eta_aniso*rhovsvsq + (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq +
+ four_rhovsvsq - four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)*0.5f)* sintheta);
+
+ c55 = sinphisq*(rhovsvsq*costhetasq + rhovshsq*sinthetasq) +
+ cosphisq*(rhovsvsq*costwothetasq +
+ (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq + four_eta_aniso*rhovsvsq)*costhetasq* sinthetasq);
+
+ c56 = costheta*sinphi*((cosphisq*
+ (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq - two_rhovshsq - two_rhovsvsq +
+ four_eta_aniso*rhovsvsq + (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq +
+ four_rhovsvsq - four_eta_aniso*rhovsvsq)*costwotheta))*0.5f +
+ (-rhovshsq + rhovsvsq)*sinphisq)*sintheta;
+
+ c66 = rhovshsq*costwophisq*costhetasq -
+ 2.0f*(rhovphsq - two_rhovshsq)*cosphisq*costhetasq*sinphisq +
+ (rhovphsq*(11.0f + 4.0f*costwotheta + cosfourtheta)*sintwophisq)*0.03125f -
+ (rhovsvsq*(-6.0f - 2.0f*cosfourphi + cos(4.0f*phi - 2.0f*theta) - 2.0f*costwotheta +
+ cos(2.0f*(2.0f*phi + theta)))*sinthetasq)*0.125f +
+ rhovpvsq*cosphisq*sinphisq*sinthetafour -
+ (eta_aniso*(rhovphsq - two_rhovsvsq)*sintwophisq*sinthetafour)*0.5f;
+
+ // general expression of stress tensor for full Cijkl with 21 coefficients
+
+ *sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl +
+ c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl;
+
+ *sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl +
+ c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl;
+
+ *sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl +
+ c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl;
+
+ *sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl +
+ c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl;
+
+ *sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl +
+ c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl;
+
+ *sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl +
+ c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl;
+
+ return;
+}
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// KERNEL 2
+//
+// for crust_mantle
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void Kernel_2_crust_mantle_impl(int nb_blocks_to_compute,
+ int NGLOB,
+ int* d_ibool,
+ int* d_ispec_is_tiso,
+ int* d_phase_ispec_inner,
+ int num_phase_ispec,
+ int d_iphase,
+ int use_mesh_coloring_gpu,
+ realw* d_displ,
+ realw* d_accel,
+ realw* d_xix, realw* d_xiy, realw* d_xiz,
+ realw* d_etax, realw* d_etay, realw* d_etaz,
+ realw* d_gammax, realw* d_gammay, realw* d_gammaz,
+ realw* d_kappavstore, realw* d_muvstore,
+ realw* d_kappahstore, realw* d_muhstore,
+ realw* d_eta_anisostore,
+ int COMPUTE_AND_STORE_STRAIN,
+ realw* epsilondev_xx,realw* epsilondev_yy,realw* epsilondev_xy,
+ realw* epsilondev_xz,realw* epsilondev_yz,
+ realw* epsilon_trace_over_3,
+ int SIMULATION_TYPE,
+ int ATTENUATION,
+ int USE_ATTENUATION_MIMIC,
+ realw* one_minus_sum_beta,realw* factor_common,
+ realw* R_xx, realw* R_yy, realw* R_xy, realw* R_xz, realw* R_yz,
+ realw* alphaval,realw* betaval,realw* gammaval,
+ int ANISOTROPY,
+ realw* d_c11store,realw* d_c12store,realw* d_c13store,
+ realw* d_c14store,realw* d_c15store,realw* d_c16store,
+ realw* d_c22store,realw* d_c23store,realw* d_c24store,
+ realw* d_c25store,realw* d_c26store,realw* d_c33store,
+ realw* d_c34store,realw* d_c35store,realw* d_c36store,
+ realw* d_c44store,realw* d_c45store,realw* d_c46store,
+ realw* d_c55store,realw* d_c56store,realw* d_c66store,
+ int GRAVITY,
+ realw* d_xstore,realw* d_ystore,realw* d_zstore,
+ realw* d_minus_gravity_table,
+ realw* d_minus_deriv_gravity_table,
+ realw* d_density_table,
+ realw* wgll_cube){
+
+ /* int bx = blockIdx.y*blockDim.x+blockIdx.x; //possible bug in original code*/
+ int bx = blockIdx.y*gridDim.x+blockIdx.x;
+ /* int bx = blockIdx.x; */
+ int tx = threadIdx.x;
+
+ //const int NGLLX = 5;
+ // const int NGLL2 = 25;
+ //const int NGLL3 = NGLL3;
+ const int NGLL3_ALIGN = NGLL3_PADDED;
+
+ int K = (tx/NGLL2);
+ int J = ((tx-K*NGLL2)/NGLLX);
+ int I = (tx-K*NGLL2-J*NGLLX);
+
+ int active,offset;
+ int iglob = 0;
+ int working_element;
+
+ reald tempx1l,tempx2l,tempx3l,tempy1l,tempy2l,tempy3l,tempz1l,tempz2l,tempz3l;
+ reald xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl;
+ reald duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl;
+ reald duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl;
+ reald duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl;
+
+ reald fac1,fac2,fac3;
+ reald minus_sum_beta,one_minus_sum_beta_use;
+
+ reald sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz;
+ reald epsilondev_xx_loc,epsilondev_yy_loc,epsilondev_xy_loc,epsilondev_xz_loc,epsilondev_yz_loc;
+ //reald c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,c33,c34,c35,c36,c44,c45,c46,c55,c56,c66;
+ reald sum_terms1,sum_terms2,sum_terms3;
+
+ // gravity variables
+ reald sigma_yx,sigma_zx,sigma_zy;
+ reald rho_s_H1,rho_s_H2,rho_s_H3;
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+ int l;
+ realw hp1,hp2,hp3;
+#endif
+
+ __shared__ reald s_dummyx_loc[NGLL3];
+ __shared__ reald s_dummyy_loc[NGLL3];
+ __shared__ reald s_dummyz_loc[NGLL3];
+
+ __shared__ reald s_tempx1[NGLL3];
+ __shared__ reald s_tempx2[NGLL3];
+ __shared__ reald s_tempx3[NGLL3];
+ __shared__ reald s_tempy1[NGLL3];
+ __shared__ reald s_tempy2[NGLL3];
+ __shared__ reald s_tempy3[NGLL3];
+ __shared__ reald s_tempz1[NGLL3];
+ __shared__ reald s_tempz2[NGLL3];
+ __shared__ reald s_tempz3[NGLL3];
+
+// use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
+// because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
+ active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
+
+// copy from global memory to shared memory
+// each thread writes one of the NGLL^3 = 125 data points
+ if (active) {
+
+#ifdef USE_MESH_COLORING_GPU
+ working_element = bx;
+#else
+ //mesh coloring
+ if( use_mesh_coloring_gpu ){
+ working_element = bx;
+ }else{
+ // iphase-1 and working_element-1 for Fortran->C array conventions
+ working_element = d_phase_ispec_inner[bx + num_phase_ispec*(d_iphase-1)]-1;
+ }
+#endif
+
+ // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
+ iglob = d_ibool[working_element*NGLL3 + tx]-1;
+
+#ifdef USE_TEXTURES
+ s_dummyx_loc[tx] = tex1Dfetch(tex_displ, iglob);
+ s_dummyy_loc[tx] = tex1Dfetch(tex_displ, iglob + NGLOB);
+ s_dummyz_loc[tx] = tex1Dfetch(tex_displ, iglob + 2*NGLOB);
+#else
+ // changing iglob indexing to match fortran row changes fast style
+ s_dummyx_loc[tx] = d_displ[iglob*3];
+ s_dummyy_loc[tx] = d_displ[iglob*3 + 1];
+ s_dummyz_loc[tx] = d_displ[iglob*3 + 2];
+#endif
+ }
+
+// synchronize all the threads (one thread for each of the NGLL grid points of the
+// current spectral element) because we need the whole element to be ready in order
+// to be able to compute the matrix products along cut planes of the 3D element below
+ __syncthreads();
+
+#ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+
+ if (active) {
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+
+ tempx1l = 0.f;
+ tempx2l = 0.f;
+ tempx3l = 0.f;
+
+ tempy1l = 0.f;
+ tempy2l = 0.f;
+ tempy3l = 0.f;
+
+ tempz1l = 0.f;
+ tempz2l = 0.f;
+ tempz3l = 0.f;
+
+ for (l=0;l<NGLLX;l++) {
+ hp1 = d_hprime_xx[l*NGLLX+I];
+ offset = K*NGLL2+J*NGLLX+l;
+ tempx1l += s_dummyx_loc[offset]*hp1;
+ tempy1l += s_dummyy_loc[offset]*hp1;
+ tempz1l += s_dummyz_loc[offset]*hp1;
+
+ hp2 = d_hprime_xx[l*NGLLX+J];
+ offset = K*NGLL2+l*NGLLX+I;
+ tempx2l += s_dummyx_loc[offset]*hp2;
+ tempy2l += s_dummyy_loc[offset]*hp2;
+ tempz2l += s_dummyz_loc[offset]*hp2;
+
+ hp3 = d_hprime_xx[l*NGLLX+K];
+ offset = l*NGLL2+J*NGLLX+I;
+ tempx3l += s_dummyx_loc[offset]*hp3;
+ tempy3l += s_dummyy_loc[offset]*hp3;
+ tempz3l += s_dummyz_loc[offset]*hp3;
+
+ }
+#else
+
+ tempx1l = s_dummyx_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+ tempy1l = s_dummyy_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+ tempz1l = s_dummyz_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+ tempx2l = s_dummyx_loc[K*NGLL2+I]*d_hprime_xx[J]
+ + s_dummyx_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+ + s_dummyx_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+ + s_dummyx_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+ + s_dummyx_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+ tempy2l = s_dummyy_loc[K*NGLL2+I]*d_hprime_xx[J]
+ + s_dummyy_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+ + s_dummyy_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+ + s_dummyy_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+ + s_dummyy_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+ tempz2l = s_dummyz_loc[K*NGLL2+I]*d_hprime_xx[J]
+ + s_dummyz_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+ + s_dummyz_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+ + s_dummyz_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+ + s_dummyz_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+ tempx3l = s_dummyx_loc[J*NGLLX+I]*d_hprime_xx[K]
+ + s_dummyx_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+ + s_dummyx_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+ + s_dummyx_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+ + s_dummyx_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+
+ tempy3l = s_dummyy_loc[J*NGLLX+I]*d_hprime_xx[K]
+ + s_dummyy_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+ + s_dummyy_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+ + s_dummyy_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+ + s_dummyy_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+
+ tempz3l = s_dummyz_loc[J*NGLLX+I]*d_hprime_xx[K]
+ + s_dummyz_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+ + s_dummyz_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+ + s_dummyz_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+ + s_dummyz_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+
+#endif
+
+ // compute derivatives of ux, uy and uz with respect to x, y and z
+ offset = working_element*NGLL3_ALIGN + tx;
+
+ xixl = d_xix[offset];
+ xiyl = d_xiy[offset];
+ xizl = d_xiz[offset];
+ etaxl = d_etax[offset];
+ etayl = d_etay[offset];
+ etazl = d_etaz[offset];
+ gammaxl = d_gammax[offset];
+ gammayl = d_gammay[offset];
+ gammazl = d_gammaz[offset];
+
+ duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l;
+ duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l;
+ duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l;
+
+ duydxl = xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l;
+ duydyl = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l;
+ duydzl = xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l;
+
+ duzdxl = xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l;
+ duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l;
+ duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l;
+
+ // precompute some sums to save CPU time
+ duxdxl_plus_duydyl = duxdxl + duydyl;
+ duxdxl_plus_duzdzl = duxdxl + duzdzl;
+ duydyl_plus_duzdzl = duydyl + duzdzl;
+ duxdyl_plus_duydxl = duxdyl + duydxl;
+ duzdxl_plus_duxdzl = duzdxl + duxdzl;
+ duzdyl_plus_duydzl = duzdyl + duydzl;
+
+ // computes deviatoric strain attenuation and/or for kernel calculations
+ if(COMPUTE_AND_STORE_STRAIN) {
+ realw templ = 0.33333333333333333333f * (duxdxl + duydyl + duzdzl); // 1./3. = 0.33333
+
+ // local storage: stresses at this current time step
+ epsilondev_xx_loc = duxdxl - templ;
+ epsilondev_yy_loc = duydyl - templ;
+ epsilondev_xy_loc = 0.5f * duxdyl_plus_duydxl;
+ epsilondev_xz_loc = 0.5f * duzdxl_plus_duxdzl;
+ epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl;
+
+ if(SIMULATION_TYPE == 3) {
+ epsilon_trace_over_3[tx + working_element*NGLL3] = templ;
+ }
+ }
+
+ // attenuation
+ if(ATTENUATION){
+ // use unrelaxed parameters if attenuation
+ one_minus_sum_beta_use = one_minus_sum_beta[tx+working_element*NGLL3]; // (i,j,k,ispec)
+ minus_sum_beta = one_minus_sum_beta_use - 1.0f;
+ }
+
+ // computes stresses
+ if(ANISOTROPY){
+ // full anisotropic case, stress calculations
+ compute_element_cm_aniso(offset,
+ d_c11store,d_c12store,d_c13store,d_c14store,d_c15store,d_c16store,d_c22store,
+ d_c23store,d_c24store,d_c25store,d_c26store,d_c33store,d_c34store,d_c35store,
+ d_c36store,d_c44store,d_c45store,d_c46store,d_c55store,d_c56store,d_c66store,
+ ATTENUATION,
+ minus_sum_beta,
+ duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl,
+ duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl,
+ duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl,
+ &sigma_xx,&sigma_yy,&sigma_zz,
+ &sigma_xy,&sigma_xz,&sigma_yz);
+
+ }else{
+ if( ! d_ispec_is_tiso[working_element] ){
+ // isotropic case
+ compute_element_cm_iso(offset,
+ d_kappavstore,d_muvstore,
+ ATTENUATION,
+ one_minus_sum_beta_use,
+ duxdxl,duydyl,duzdzl,
+ duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl,
+ duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl,
+ &sigma_xx,&sigma_yy,&sigma_zz,
+ &sigma_xy,&sigma_xz,&sigma_yz);
+ }else{
+ // transverse isotropy
+ compute_element_cm_tiso(offset,
+ d_kappavstore,d_muvstore,
+ d_kappahstore,d_muhstore,d_eta_anisostore,
+ ATTENUATION,
+ one_minus_sum_beta_use,
+ duxdxl,duxdyl,duxdzl,
+ duydxl,duydyl,duydzl,
+ duzdxl,duzdyl,duzdzl,
+ duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl,
+ duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl,
+ iglob, NGLOB,
+ d_ystore,d_zstore,
+ &sigma_xx,&sigma_yy,&sigma_zz,
+ &sigma_xy,&sigma_xz,&sigma_yz);
+ }
+ } // ! end of test whether isotropic or anisotropic element
+
+
+ if(ATTENUATION && (! USE_ATTENUATION_MIMIC ) ){
+ // subtracts memory variables if attenuation
+ compute_element_cm_att_stress(tx,working_element,
+ R_xx,R_yy,R_xy,R_xz,R_yz,
+ &sigma_xx,&sigma_yy,&sigma_zz,&sigma_xy,&sigma_xz,&sigma_yz);
+ }
+
+ // define symmetric components (needed for non-symmetric dot product and sigma for gravity)
+ sigma_yx = sigma_xy;
+ sigma_zx = sigma_xz;
+ sigma_zy = sigma_yz;
+
+ // jacobian
+ jacobianl = 1.0f / (xixl*(etayl*gammazl-etazl*gammayl)
+ -xiyl*(etaxl*gammazl-etazl*gammaxl)
+ +xizl*(etaxl*gammayl-etayl*gammaxl));
+
+ if( GRAVITY ){
+ // computes non-symmetric terms for gravity
+ compute_element_cm_gravity(tx,working_element,
+ d_ibool,d_xstore,d_ystore,d_zstore,
+ d_minus_gravity_table,d_minus_deriv_gravity_table,d_density_table,
+ wgll_cube,jacobianl,
+ s_dummyx_loc,s_dummyy_loc,s_dummyz_loc,
+ &sigma_xx,&sigma_yy,&sigma_zz,&sigma_xy,&sigma_yx,
+ &sigma_xz,&sigma_zx,&sigma_yz,&sigma_zy,
+ &rho_s_H1,&rho_s_H2,&rho_s_H3);
+ }
+
+ // form dot product with test vector, non-symmetric form
+ s_tempx1[tx] = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl);
+ s_tempy1[tx] = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl);
+ s_tempz1[tx] = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl);
+
+ s_tempx2[tx] = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl);
+ s_tempy2[tx] = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl);
+ s_tempz2[tx] = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl);
+
+ s_tempx3[tx] = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl);
+ s_tempy3[tx] = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl);
+ s_tempz3[tx] = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl);
+
+ }
+
+// synchronize all the threads (one thread for each of the NGLL grid points of the
+// current spectral element) because we need the whole element to be ready in order
+// to be able to compute the matrix products along cut planes of the 3D element below
+ __syncthreads();
+
+ if (active) {
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+
+ tempx1l = 0.f;
+ tempy1l = 0.f;
+ tempz1l = 0.f;
+
+ tempx2l = 0.f;
+ tempy2l = 0.f;
+ tempz2l = 0.f;
+
+ tempx3l = 0.f;
+ tempy3l = 0.f;
+ tempz3l = 0.f;
+
+ for (l=0;l<NGLLX;l++) {
+
+ fac1 = d_hprimewgll_xx[I*NGLLX+l];
+ offset = K*NGLL2+J*NGLLX+l;
+ tempx1l += s_tempx1[offset]*fac1;
+ tempy1l += s_tempy1[offset]*fac1;
+ tempz1l += s_tempz1[offset]*fac1;
+
+ fac2 = d_hprimewgll_yy[J*NGLLX+l];
+ offset = K*NGLL2+l*NGLLX+I;
+ tempx2l += s_tempx2[offset]*fac2;
+ tempy2l += s_tempy2[offset]*fac2;
+ tempz2l += s_tempz2[offset]*fac2;
+
+ fac3 = d_hprimewgll_zz[K*NGLLX+l];
+ offset = l*NGLL2+J*NGLLX+I;
+ tempx3l += s_tempx3[offset]*fac3;
+ tempy3l += s_tempy3[offset]*fac3;
+ tempz3l += s_tempz3[offset]*fac3;
+
+ }
+#else
+
+ tempx1l = s_tempx1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
+ + s_tempx1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
+ + s_tempx1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
+ + s_tempx1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
+ + s_tempx1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
+
+ tempy1l = s_tempy1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
+ + s_tempy1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
+ + s_tempy1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
+ + s_tempy1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
+ + s_tempy1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
+
+ tempz1l = s_tempz1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
+ + s_tempz1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
+ + s_tempz1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
+ + s_tempz1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
+ + s_tempz1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
+
+ tempx2l = s_tempx2[K*NGLL2+I]*d_hprimewgll_yy[J*NGLLX]
+ + s_tempx2[K*NGLL2+NGLLX+I]*d_hprimewgll_yy[J*NGLLX+1]
+ + s_tempx2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+2]
+ + s_tempx2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+3]
+ + s_tempx2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+4];
+
+ tempy2l = s_tempy2[K*NGLL2+I]*d_hprimewgll_yy[J*NGLLX]
+ + s_tempy2[K*NGLL2+NGLLX+I]*d_hprimewgll_yy[J*NGLLX+1]
+ + s_tempy2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+2]
+ + s_tempy2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+3]
+ + s_tempy2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+4];
+
+ tempz2l = s_tempz2[K*NGLL2+I]*d_hprimewgll_yy[J*NGLLX]
+ + s_tempz2[K*NGLL2+NGLLX+I]*d_hprimewgll_yy[J*NGLLX+1]
+ + s_tempz2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+2]
+ + s_tempz2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+3]
+ + s_tempz2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+4];
+
+ tempx3l = s_tempx3[J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX]
+ + s_tempx3[NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+1]
+ + s_tempx3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+2]
+ + s_tempx3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+3]
+ + s_tempx3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+4];
+
+ tempy3l = s_tempy3[J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX]
+ + s_tempy3[NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+1]
+ + s_tempy3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+2]
+ + s_tempy3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+3]
+ + s_tempy3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+4];
+
+ tempz3l = s_tempz3[J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX]
+ + s_tempz3[NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+1]
+ + s_tempz3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+2]
+ + s_tempz3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+3]
+ + s_tempz3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+4];
+
+#endif
+
+ fac1 = d_wgllwgll_yz[K*NGLLX+J];
+ fac2 = d_wgllwgll_xz[K*NGLLX+I];
+ fac3 = d_wgllwgll_xy[J*NGLLX+I];
+
+ sum_terms1 = - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
+ sum_terms2 = - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
+ sum_terms3 = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+
+ // adds gravity term
+ if( GRAVITY ){
+ sum_terms1 += rho_s_H1;
+ sum_terms2 += rho_s_H2;
+ sum_terms3 += rho_s_H3;
+ }
+
+#ifdef USE_TEXTURES
+ d_accel[iglob] = tex1Dfetch(tex_accel, iglob) + sum_terms1);
+ d_accel[iglob + NGLOB] = tex1Dfetch(tex_accel, iglob + NGLOB) + sum_terms2);
+ d_accel[iglob + 2*NGLOB] = tex1Dfetch(tex_accel, iglob + 2*NGLOB) + sum_terms3);
+#else
+ /* OLD/To be implemented version that uses coloring to get around race condition. About 1.6x faster */
+
+
+#ifdef USE_MESH_COLORING_GPU
+ // no atomic operation needed, colors don't share global points between elements
+ d_accel[iglob*3] += sum_terms1;
+ d_accel[iglob*3 + 1] += sum_terms2;
+ d_accel[iglob*3 + 2] += sum_terms3;
+#else
+ //mesh coloring
+ if( use_mesh_coloring_gpu ){
+
+ // no atomic operation needed, colors don't share global points between elements
+ d_accel[iglob*3] += sum_terms1;
+ d_accel[iglob*3 + 1] += sum_terms2;
+ d_accel[iglob*3 + 2] += sum_terms3;
+
+ }else{
+
+ // for testing purposes only: w/out atomic updates
+ //d_accel[iglob*3] -= (0.00000001f*tempx1l + 0.00000001f*tempx2l + 0.00000001f*tempx3l);
+ //d_accel[iglob*3 + 1] -= (0.00000001f*tempy1l + 0.00000001f*tempy2l + 0.00000001f*tempy3l);
+ //d_accel[iglob*3 + 2] -= (0.00000001f*tempz1l + 0.00000001f*tempz2l + 0.00000001f*tempz3l);
+
+ atomicAdd(&d_accel[iglob*3], sum_terms1);
+ atomicAdd(&d_accel[iglob*3+1], sum_terms2);
+ atomicAdd(&d_accel[iglob*3+2], sum_terms3);
+
+ }
+#endif
+
+#endif
+
+ // update memory variables based upon the Runge-Kutta scheme
+ if( ATTENUATION && ( ! USE_ATTENUATION_MIMIC ) ){
+ compute_element_cm_att_memory(tx,working_element,
+ d_muvstore,
+ factor_common,alphaval,betaval,gammaval,
+ R_xx,R_yy,R_xy,R_xz,R_yz,
+ epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz,
+ epsilondev_xx_loc,epsilondev_yy_loc,epsilondev_xy_loc,epsilondev_xz_loc,epsilondev_yz_loc,
+ ANISOTROPY,d_c44store);
+ }
+
+ // save deviatoric strain for Runge-Kutta scheme
+ if( COMPUTE_AND_STORE_STRAIN ){
+ int ijk_ispec = tx + working_element*NGLL3;
+
+ // fortran: epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
+ epsilondev_xx[ijk_ispec] = epsilondev_xx_loc;
+ epsilondev_yy[ijk_ispec] = epsilondev_yy_loc;
+ epsilondev_xy[ijk_ispec] = epsilondev_xy_loc;
+ epsilondev_xz[ijk_ispec] = epsilondev_xz_loc;
+ epsilondev_yz[ijk_ispec] = epsilondev_yz_loc;
+ }
+
+ }
+
+#else // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+ d_accel[iglob] -= 0.00000001f;
+ d_accel[iglob + NGLOB] -= 0.00000001f;
+ d_accel[iglob + 2*NGLOB] -= 0.00000001f;
+#endif // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void Kernel_2_crust_mantle(int nb_blocks_to_compute,Mesh* mp,
+ int d_iphase,
+ int* d_ibool,
+ int* d_ispec_is_tiso,
+ realw* d_xix,realw* d_xiy,realw* d_xiz,
+ realw* d_etax,realw* d_etay,realw* d_etaz,
+ realw* d_gammax,realw* d_gammay,realw* d_gammaz,
+ realw* d_kappavstore,realw* d_muvstore,
+ realw* d_kappahstore,realw* d_muhstore,
+ realw* d_eta_anisostore,
+ realw* d_epsilondev_xx,
+ realw* d_epsilondev_yy,
+ realw* d_epsilondev_xy,
+ realw* d_epsilondev_xz,
+ realw* d_epsilondev_yz,
+ realw* d_epsilon_trace_over_3,
+ realw* d_one_minus_sum_beta,
+ realw* d_factor_common,
+ realw* d_R_xx,
+ realw* d_R_yy,
+ realw* d_R_xy,
+ realw* d_R_xz,
+ realw* d_R_yz,
+ realw* d_b_epsilondev_xx,
+ realw* d_b_epsilondev_yy,
+ realw* d_b_epsilondev_xy,
+ realw* d_b_epsilondev_xz,
+ realw* d_b_epsilondev_yz,
+ realw* d_b_epsilon_trace_over_3,
+ realw* d_b_R_xx,
+ realw* d_b_R_yy,
+ realw* d_b_R_xy,
+ realw* d_b_R_xz,
+ realw* d_b_R_yz,
+ realw* d_c11store,realw* d_c12store,realw* d_c13store,
+ realw* d_c14store,realw* d_c15store,realw* d_c16store,
+ realw* d_c22store,realw* d_c23store,realw* d_c24store,
+ realw* d_c25store,realw* d_c26store,realw* d_c33store,
+ realw* d_c34store,realw* d_c35store,realw* d_c36store,
+ realw* d_c44store,realw* d_c45store,realw* d_c46store,
+ realw* d_c55store,realw* d_c56store,realw* d_c66store
+ ){
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("before kernel Kernel_2_crust_mantle");
+#endif
+
+ /* if the grid can handle the number of blocks, we let it be 1D */
+ /* grid_2_x = nb_elem_color; */
+ /* nb_elem_color is just how many blocks we are computing now */
+
+ int num_blocks_x = nb_blocks_to_compute;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ int blocksize = NGLL3_PADDED;
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // Cuda timing
+ // cudaEvent_t start, stop;
+ // realw time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
+ // cudaEventRecord( start, 0 );
+
+ Kernel_2_crust_mantle_impl<<<grid,threads>>>(nb_blocks_to_compute,
+ mp->NGLOB_CRUST_MANTLE,
+ d_ibool,
+ d_ispec_is_tiso,
+ mp->d_phase_ispec_inner_crust_mantle,
+ mp->num_phase_ispec_crust_mantle,
+ d_iphase,
+ mp->use_mesh_coloring_gpu,
+ mp->d_displ_crust_mantle,
+ mp->d_accel_crust_mantle,
+ d_xix, d_xiy, d_xiz,
+ d_etax, d_etay, d_etaz,
+ d_gammax, d_gammay, d_gammaz,
+ d_kappavstore, d_muvstore,
+ d_kappahstore, d_muhstore,
+ d_eta_anisostore,
+ mp->compute_and_store_strain,
+ d_epsilondev_xx,d_epsilondev_yy,d_epsilondev_xy,
+ d_epsilondev_xz,d_epsilondev_yz,
+ d_epsilon_trace_over_3,
+ mp->simulation_type,
+ mp->attenuation,
+ mp->use_attenuation_mimic,
+ d_one_minus_sum_beta,d_factor_common,
+ d_R_xx,d_R_yy,d_R_xy,d_R_xz,d_R_yz,
+ mp->d_alphaval,mp->d_betaval,mp->d_gammaval,
+ mp->anisotropic_3D_mantle,
+ d_c11store,d_c12store,d_c13store,
+ d_c14store,d_c15store,d_c16store,
+ d_c22store,d_c23store,d_c24store,
+ d_c25store,d_c26store,d_c33store,
+ d_c34store,d_c35store,d_c36store,
+ d_c44store,d_c45store,d_c46store,
+ d_c55store,d_c56store,d_c66store,
+ mp->gravity,
+ mp->d_xstore_crust_mantle,mp->d_ystore_crust_mantle,mp->d_zstore_crust_mantle,
+ mp->d_minus_gravity_table,
+ mp->d_minus_deriv_gravity_table,
+ mp->d_density_table,
+ mp->d_wgll_cube);
+
+
+ if(mp->simulation_type == 3) {
+ Kernel_2_crust_mantle_impl<<< grid,threads>>>(nb_blocks_to_compute,
+ mp->NGLOB_CRUST_MANTLE,
+ d_ibool,
+ d_ispec_is_tiso,
+ mp->d_phase_ispec_inner_crust_mantle,
+ mp->num_phase_ispec_crust_mantle,
+ d_iphase,
+ mp->use_mesh_coloring_gpu,
+ mp->d_b_displ_crust_mantle,
+ mp->d_b_accel_crust_mantle,
+ d_xix, d_xiy, d_xiz,
+ d_etax, d_etay, d_etaz,
+ d_gammax, d_gammay, d_gammaz,
+ d_kappavstore, d_muvstore,
+ d_kappahstore, d_muhstore,
+ d_eta_anisostore,
+ mp->compute_and_store_strain,
+ d_b_epsilondev_xx,d_b_epsilondev_yy,d_b_epsilondev_xy,
+ d_b_epsilondev_xz,d_b_epsilondev_yz,
+ d_b_epsilon_trace_over_3,
+ mp->simulation_type,
+ mp->attenuation,
+ mp->use_attenuation_mimic,
+ d_one_minus_sum_beta,d_factor_common,
+ d_b_R_xx,d_b_R_yy,d_b_R_xy,d_b_R_xz,d_b_R_yz,
+ mp->d_b_alphaval,mp->d_b_betaval,mp->d_b_gammaval,
+ mp->anisotropic_3D_mantle,
+ d_c11store,d_c12store,d_c13store,
+ d_c14store,d_c15store,d_c16store,
+ d_c22store,d_c23store,d_c24store,
+ d_c25store,d_c26store,d_c33store,
+ d_c34store,d_c35store,d_c36store,
+ d_c44store,d_c45store,d_c46store,
+ d_c55store,d_c56store,d_c66store,
+ mp->gravity,
+ mp->d_xstore_crust_mantle,mp->d_ystore_crust_mantle,mp->d_zstore_crust_mantle,
+ mp->d_minus_gravity_table,
+ mp->d_minus_deriv_gravity_table,
+ mp->d_density_table,
+ mp->d_wgll_cube);
+ }
+
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("Kernel2 Execution Time: %f ms\n",time);
+
+ /* cudaThreadSynchronize(); */
+ /* LOG("Kernel 2 finished"); */
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("Kernel_2_crust_mantle");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(compute_forces_crust_mantle_cuda,
+ COMPUTE_FORCES_CRUST_MANTLE_CUDA)(long* Mesh_pointer_f,
+ int* iphase) {
+
+ TRACE("compute_forces_crust_mantle_cuda");
+
+//daniel: debug time
+// printf("Running compute_forces\n");
+// double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ int num_elements;
+
+ if( *iphase == 1 )
+ num_elements = mp->nspec_outer_crust_mantle;
+ else
+ num_elements = mp->nspec_inner_crust_mantle;
+
+ // checks if anything to do
+ if( num_elements == 0 ) return;
+
+ // mesh coloring
+ if( mp->use_mesh_coloring_gpu ){
+
+ // note: array offsets require sorted arrays, such that e.g. ibool starts with elastic elements
+ // and followed by acoustic ones.
+ // elastic elements also start with outer than inner element ordering
+
+ int nb_colors,nb_blocks_to_compute;
+ int istart;
+ int color_offset,color_offset_nonpadded,color_offset_nonpadded_att2;
+ int color_offset_ispec;
+
+ // sets up color loop
+ if( *iphase == 1 ){
+ // outer elements
+ nb_colors = mp->num_colors_outer_crust_mantle;
+ istart = 0;
+
+ // array offsets
+ color_offset = 0;
+ color_offset_nonpadded = 0;
+ color_offset_nonpadded_att2 = 0;
+ color_offset_ispec = 0;
+ }else{
+ // inner elements (start after outer elements)
+ nb_colors = mp->num_colors_outer_crust_mantle + mp->num_colors_inner_crust_mantle;
+ istart = mp->num_colors_outer_crust_mantle;
+
+ // array offsets
+ color_offset = (mp->nspec_outer_crust_mantle) * NGLL3_PADDED;
+ color_offset_nonpadded = (mp->nspec_outer_crust_mantle) * NGLL3;
+ color_offset_nonpadded_att2 = (mp->nspec_outer_crust_mantle) * NGLL3 * N_SLS;
+ color_offset_ispec = mp->nspec_outer_crust_mantle;
+ }
+
+ // loops over colors
+ for(int icolor = istart; icolor < nb_colors; icolor++){
+
+ nb_blocks_to_compute = mp->h_num_elem_colors_crust_mantle[icolor];
+
+ // checks
+ //if( nb_blocks_to_compute <= 0 ){
+ // printf("error number of elastic color blocks: %d -- color = %d \n",nb_blocks_to_compute,icolor);
+ // exit(EXIT_FAILURE);
+ //}
+
+ Kernel_2_crust_mantle(nb_blocks_to_compute,mp,
+ *iphase,
+ mp->d_ibool_crust_mantle + color_offset_nonpadded,
+ mp->d_ispec_is_tiso_crust_mantle + color_offset_ispec,
+ mp->d_xix_crust_mantle + color_offset,
+ mp->d_xiy_crust_mantle + color_offset,
+ mp->d_xiz_crust_mantle + color_offset,
+ mp->d_etax_crust_mantle + color_offset,
+ mp->d_etay_crust_mantle + color_offset,
+ mp->d_etaz_crust_mantle + color_offset,
+ mp->d_gammax_crust_mantle + color_offset,
+ mp->d_gammay_crust_mantle + color_offset,
+ mp->d_gammaz_crust_mantle + color_offset,
+ mp->d_kappavstore_crust_mantle + color_offset,
+ mp->d_muvstore_crust_mantle + color_offset,
+ mp->d_kappahstore_crust_mantle + color_offset,
+ mp->d_muhstore_crust_mantle + color_offset,
+ mp->d_eta_anisostore_crust_mantle + color_offset,
+ mp->d_epsilondev_xx_crust_mantle + color_offset_nonpadded,
+ mp->d_epsilondev_yy_crust_mantle + color_offset_nonpadded,
+ mp->d_epsilondev_xy_crust_mantle + color_offset_nonpadded,
+ mp->d_epsilondev_xz_crust_mantle + color_offset_nonpadded,
+ mp->d_epsilondev_yz_crust_mantle + color_offset_nonpadded,
+ mp->d_eps_trace_over_3_crust_mantle + color_offset_nonpadded,
+ mp->d_one_minus_sum_beta_crust_mantle + color_offset_nonpadded,
+ mp->d_factor_common_crust_mantle + color_offset_nonpadded_att2,
+ mp->d_R_xx_crust_mantle + color_offset_nonpadded,
+ mp->d_R_yy_crust_mantle + color_offset_nonpadded,
+ mp->d_R_xy_crust_mantle + color_offset_nonpadded,
+ mp->d_R_xz_crust_mantle + color_offset_nonpadded,
+ mp->d_R_yz_crust_mantle + color_offset_nonpadded,
+ mp->d_b_epsilondev_xx_crust_mantle + color_offset_nonpadded,
+ mp->d_b_epsilondev_yy_crust_mantle + color_offset_nonpadded,
+ mp->d_b_epsilondev_xy_crust_mantle + color_offset_nonpadded,
+ mp->d_b_epsilondev_xz_crust_mantle + color_offset_nonpadded,
+ mp->d_b_epsilondev_yz_crust_mantle + color_offset_nonpadded,
+ mp->d_b_eps_trace_over_3_crust_mantle + color_offset_nonpadded,
+ mp->d_b_R_xx_crust_mantle + color_offset_nonpadded,
+ mp->d_b_R_yy_crust_mantle + color_offset_nonpadded,
+ mp->d_b_R_xy_crust_mantle + color_offset_nonpadded,
+ mp->d_b_R_xz_crust_mantle + color_offset_nonpadded,
+ mp->d_b_R_yz_crust_mantle + color_offset_nonpadded,
+ mp->d_c11store_crust_mantle + color_offset,
+ mp->d_c12store_crust_mantle + color_offset,
+ mp->d_c13store_crust_mantle + color_offset,
+ mp->d_c14store_crust_mantle + color_offset,
+ mp->d_c15store_crust_mantle + color_offset,
+ mp->d_c16store_crust_mantle + color_offset,
+ mp->d_c22store_crust_mantle + color_offset,
+ mp->d_c23store_crust_mantle + color_offset,
+ mp->d_c24store_crust_mantle + color_offset,
+ mp->d_c25store_crust_mantle + color_offset,
+ mp->d_c26store_crust_mantle + color_offset,
+ mp->d_c33store_crust_mantle + color_offset,
+ mp->d_c34store_crust_mantle + color_offset,
+ mp->d_c35store_crust_mantle + color_offset,
+ mp->d_c36store_crust_mantle + color_offset,
+ mp->d_c44store_crust_mantle + color_offset,
+ mp->d_c45store_crust_mantle + color_offset,
+ mp->d_c46store_crust_mantle + color_offset,
+ mp->d_c55store_crust_mantle + color_offset,
+ mp->d_c56store_crust_mantle + color_offset,
+ mp->d_c66store_crust_mantle + color_offset
+ );
+
+ // for padded and aligned arrays
+ color_offset += nb_blocks_to_compute * NGLL3_PADDED;
+ // for no-aligned arrays
+ color_offset_nonpadded += nb_blocks_to_compute * NGLL3;
+ // for factor_common array
+ color_offset_nonpadded_att2 += nb_blocks_to_compute * NGLL3 * N_SLS;
+ // for array(ispec)
+ color_offset_ispec += nb_blocks_to_compute;
+ }
+
+ }else{
+
+ // no mesh coloring: uses atomic updates
+
+ Kernel_2_crust_mantle(num_elements,mp,
+ *iphase,
+ mp->d_ibool_crust_mantle,
+ mp->d_ispec_is_tiso_crust_mantle,
+ mp->d_xix_crust_mantle,mp->d_xiy_crust_mantle,mp->d_xiz_crust_mantle,
+ mp->d_etax_crust_mantle,mp->d_etay_crust_mantle,mp->d_etaz_crust_mantle,
+ mp->d_gammax_crust_mantle,mp->d_gammay_crust_mantle,mp->d_gammaz_crust_mantle,
+ mp->d_kappavstore_crust_mantle,mp->d_muvstore_crust_mantle,
+ mp->d_kappahstore_crust_mantle,mp->d_muhstore_crust_mantle,
+ mp->d_eta_anisostore_crust_mantle,
+ mp->d_epsilondev_xx_crust_mantle,
+ mp->d_epsilondev_yy_crust_mantle,
+ mp->d_epsilondev_xy_crust_mantle,
+ mp->d_epsilondev_xz_crust_mantle,
+ mp->d_epsilondev_yz_crust_mantle,
+ mp->d_eps_trace_over_3_crust_mantle,
+ mp->d_one_minus_sum_beta_crust_mantle,
+ mp->d_factor_common_crust_mantle,
+ mp->d_R_xx_crust_mantle,
+ mp->d_R_yy_crust_mantle,
+ mp->d_R_xy_crust_mantle,
+ mp->d_R_xz_crust_mantle,
+ mp->d_R_yz_crust_mantle,
+ mp->d_b_epsilondev_xx_crust_mantle,
+ mp->d_b_epsilondev_yy_crust_mantle,
+ mp->d_b_epsilondev_xy_crust_mantle,
+ mp->d_b_epsilondev_xz_crust_mantle,
+ mp->d_b_epsilondev_yz_crust_mantle,
+ mp->d_b_eps_trace_over_3_crust_mantle,
+ mp->d_b_R_xx_crust_mantle,
+ mp->d_b_R_yy_crust_mantle,
+ mp->d_b_R_xy_crust_mantle,
+ mp->d_b_R_xz_crust_mantle,
+ mp->d_b_R_yz_crust_mantle,
+ mp->d_c11store_crust_mantle,mp->d_c12store_crust_mantle,mp->d_c13store_crust_mantle,
+ mp->d_c14store_crust_mantle,mp->d_c15store_crust_mantle,mp->d_c16store_crust_mantle,
+ mp->d_c22store_crust_mantle,mp->d_c23store_crust_mantle,mp->d_c24store_crust_mantle,
+ mp->d_c25store_crust_mantle,mp->d_c26store_crust_mantle,mp->d_c33store_crust_mantle,
+ mp->d_c34store_crust_mantle,mp->d_c35store_crust_mantle,mp->d_c36store_crust_mantle,
+ mp->d_c44store_crust_mantle,mp->d_c45store_crust_mantle,mp->d_c46store_crust_mantle,
+ mp->d_c55store_crust_mantle,mp->d_c56store_crust_mantle,mp->d_c66store_crust_mantle
+ );
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("compute_forces_crust_mantle_cuda");
+#endif
+}
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,1217 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// elemental routines
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// updates stress
+
+__device__ void compute_element_ic_att_stress(int tx,int working_element,
+ realw* R_xx,
+ realw* R_yy,
+ realw* R_xy,
+ realw* R_xz,
+ realw* R_yz,
+ reald* sigma_xx,
+ reald* sigma_yy,
+ reald* sigma_zz,
+ reald* sigma_xy,
+ reald* sigma_xz,
+ reald* sigma_yz) {
+
+ int i_sls,offset;
+ reald R_xx_val,R_yy_val;
+
+ for(i_sls = 0; i_sls < N_SLS; i_sls++){
+ // index
+ // note: index for R_xx,.. here is (i_sls,i,j,k,ispec) and not (i,j,k,ispec,i_sls) as in local version
+ // local version: offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls);
+ offset = i_sls + N_SLS*(tx + NGLL3*working_element);
+
+ R_xx_val = R_xx[offset];
+ R_yy_val = R_yy[offset];
+
+ *sigma_xx = *sigma_xx - R_xx_val;
+ *sigma_yy = *sigma_yy - R_yy_val;
+ *sigma_zz = *sigma_zz + R_xx_val + R_yy_val;
+ *sigma_xy = *sigma_xy - R_xy[offset];
+ *sigma_xz = *sigma_xz - R_xz[offset];
+ *sigma_yz = *sigma_yz - R_yz[offset];
+ }
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// updates R_memory
+
+__device__ void compute_element_ic_att_memory(int tx,int working_element,
+ realw* d_muv,
+ realw* factor_common,
+ realw* alphaval,realw* betaval,realw* gammaval,
+ realw* R_xx,realw* R_yy,realw* R_xy,realw* R_xz,realw* R_yz,
+ realw* epsilondev_xx,realw* epsilondev_yy,realw* epsilondev_xy,
+ realw* epsilondev_xz,realw* epsilondev_yz,
+ reald epsilondev_xx_loc,reald epsilondev_yy_loc,reald epsilondev_xy_loc,
+ reald epsilondev_xz_loc,reald epsilondev_yz_loc
+ ){
+
+ int i_sls;
+ int ijk_ispec;
+ int offset_align,offset;
+ reald mul;
+ reald alphaval_loc,betaval_loc,gammaval_loc;
+ reald factor_loc,Sn,Snp1;
+
+ // indices
+ offset_align = tx + NGLL3_PADDED * working_element;
+ ijk_ispec = tx + NGLL3 * working_element;
+
+ mul = d_muv[offset_align];
+
+ // use Runge-Kutta scheme to march in time
+ for(i_sls = 0; i_sls < N_SLS; i_sls++){
+
+ // indices
+ // note: index for R_xx,... here is (i_sls,i,j,k,ispec) and not (i,j,k,ispec,i_sls) as in local version
+ // local version: offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls);
+ // index for (i_sls,i,j,k,ispec)
+ offset = i_sls + N_SLS*(tx + NGLL3*working_element);
+
+ factor_loc = mul * factor_common[offset]; //mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
+
+ alphaval_loc = alphaval[i_sls]; // (i_sls)
+ betaval_loc = betaval[i_sls];
+ gammaval_loc = gammaval[i_sls];
+
+ // term in xx
+ Sn = factor_loc * epsilondev_xx[ijk_ispec]; //(i,j,k,ispec)
+ Snp1 = factor_loc * epsilondev_xx_loc; //(i,j,k)
+ R_xx[offset] = alphaval_loc * R_xx[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+
+ // term in yy
+ Sn = factor_loc * epsilondev_yy[ijk_ispec];
+ Snp1 = factor_loc * epsilondev_yy_loc;
+ R_yy[offset] = alphaval_loc * R_yy[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+ // term in zz not computed since zero trace
+
+ // term in xy
+ Sn = factor_loc * epsilondev_xy[ijk_ispec];
+ Snp1 = factor_loc * epsilondev_xy_loc;
+ R_xy[offset] = alphaval_loc * R_xy[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+
+ // term in xz
+ Sn = factor_loc * epsilondev_xz[ijk_ispec];
+ Snp1 = factor_loc * epsilondev_xz_loc;
+ R_xz[offset] = alphaval_loc * R_xz[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+
+ // term in yz
+ Sn = factor_loc * epsilondev_yz[ijk_ispec];
+ Snp1 = factor_loc * epsilondev_yz_loc;
+ R_yz[offset] = alphaval_loc * R_yz[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+ }
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// pre-computes gravity term
+
+__device__ void compute_element_ic_gravity(int tx,int working_element,
+ int* d_ibool,
+ realw* d_xstore,realw* d_ystore,realw* d_zstore,
+ realw* d_minus_gravity_table,
+ realw* d_minus_deriv_gravity_table,
+ realw* d_density_table,
+ realw* wgll_cube,
+ reald jacobianl,
+ reald* s_dummyx_loc,
+ reald* s_dummyy_loc,
+ reald* s_dummyz_loc,
+ reald* sigma_xx,
+ reald* sigma_yy,
+ reald* sigma_zz,
+ reald* sigma_xy,
+ reald* sigma_yx,
+ reald* sigma_xz,
+ reald* sigma_zx,
+ reald* sigma_yz,
+ reald* sigma_zy,
+ reald* rho_s_H1,
+ reald* rho_s_H2,
+ reald* rho_s_H3){
+
+ reald radius,theta,phi;
+ reald cos_theta,sin_theta,cos_phi,sin_phi;
+ reald minus_g,minus_dg;
+ reald rho;
+ reald gxl,gyl,gzl;
+ reald minus_g_over_radius,minus_dg_plus_g_over_radius;
+ reald cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq;
+ reald Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl;
+ reald sx_l,sy_l,sz_l;
+ reald factor;
+
+ // R_EARTH_KM is the radius of the bottom of the oceans
+ const reald R_EARTH = 6371000.0f; // in m
+ const reald R_EARTH_KM = 6371.0f; // in km
+ // uncomment line below for PREM with oceans
+ //const reald R_EARTH = 6368000.0f;
+ //const reald R_EARTH_KM = 6368.0f;
+
+ // compute non-symmetric terms for gravity
+
+ // use mesh coordinates to get theta and phi
+ // x y z contain r theta phi
+ int iglob = d_ibool[working_element*NGLL3 + tx]-1;
+
+ radius = d_xstore[iglob];
+ // make sure radius is never zero even for points at center of cube
+ // because we later divide by radius
+ if(radius < 100.f / R_EARTH){ radius = 100.f / R_EARTH; }
+
+ theta = d_ystore[iglob];
+ phi = d_zstore[iglob];
+
+ cos_theta = cos(theta);
+ sin_theta = sin(theta);
+ cos_phi = cos(phi);
+ sin_phi = sin(phi);
+
+ // for efficiency replace with lookup table every 100 m in radial direction
+ // note: radius in crust mantle should never be zero,
+ // and arrays in C start from 0, thus we need to subtract -1
+ int int_radius = rint(radius * R_EARTH_KM * 10.0f ) - 1;
+ //make sure we never use below zero for point exactly at the center of the Earth
+ if( int_radius < 0 ){int_radius = 0;}
+
+ // get g, rho and dg/dr=dg
+ // spherical components of the gravitational acceleration
+ // for efficiency replace with lookup table every 100 m in radial direction
+ minus_g = d_minus_gravity_table[int_radius];
+ minus_dg = d_minus_deriv_gravity_table[int_radius];
+ rho = d_density_table[int_radius];
+
+ // Cartesian components of the gravitational acceleration
+ gxl = minus_g*sin_theta*cos_phi;
+ gyl = minus_g*sin_theta*sin_phi;
+ gzl = minus_g*cos_theta;
+
+ // Cartesian components of gradient of gravitational acceleration
+ // obtained from spherical components
+
+ minus_g_over_radius = minus_g / radius;
+ minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius;
+
+ cos_theta_sq = cos_theta*cos_theta;
+ sin_theta_sq = sin_theta*sin_theta;
+ cos_phi_sq = cos_phi*cos_phi;
+ sin_phi_sq = sin_phi*sin_phi;
+
+ Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq;
+ Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq;
+ Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq;
+ Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq;
+ Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta;
+ Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta;
+
+ // get displacement and multiply by density to compute G tensor
+ sx_l = rho * s_dummyx_loc[tx];
+ sy_l = rho * s_dummyy_loc[tx];
+ sz_l = rho * s_dummyz_loc[tx];
+
+ // compute G tensor from s . g and add to sigma (not symmetric)
+ *sigma_xx = *sigma_xx + sy_l*gyl + sz_l*gzl;
+ *sigma_yy = *sigma_yy + sx_l*gxl + sz_l*gzl;
+ *sigma_zz = *sigma_zz + sx_l*gxl + sy_l*gyl;
+
+ *sigma_xy = *sigma_xy - sx_l * gyl;
+ *sigma_yx = *sigma_yx - sy_l * gxl;
+
+ *sigma_xz = *sigma_xz - sx_l * gzl;
+ *sigma_zx = *sigma_zx - sz_l * gxl;
+
+ *sigma_yz = *sigma_yz - sy_l * gzl;
+ *sigma_zy = *sigma_zy - sz_l * gyl;
+
+ // precompute vector
+ factor = jacobianl * wgll_cube[tx];
+ *rho_s_H1 = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl);
+ *rho_s_H2 = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl);
+ *rho_s_H3 = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl);
+
+ return;
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// KERNEL 2
+//
+// for inner_core
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void Kernel_2_inner_core_impl(int nb_blocks_to_compute,
+ int NGLOB,
+ int* d_ibool,
+ int* d_idoubling,
+ int* d_phase_ispec_inner,
+ int num_phase_ispec,
+ int d_iphase,
+ int use_mesh_coloring_gpu,
+ realw* d_displ,
+ realw* d_accel,
+ realw* d_xix, realw* d_xiy, realw* d_xiz,
+ realw* d_etax, realw* d_etay, realw* d_etaz,
+ realw* d_gammax, realw* d_gammay, realw* d_gammaz,
+ realw* d_hprime_xx, realw* d_hprime_yy, realw* d_hprime_zz,
+ realw* d_hprimewgll_xx, realw* d_hprimewgll_yy, realw* d_hprimewgll_zz,
+ realw* d_wgllwgll_xy,realw* d_wgllwgll_xz,realw* d_wgllwgll_yz,
+ realw* d_kappav,
+ realw* d_muv,
+ int COMPUTE_AND_STORE_STRAIN,
+ realw* epsilondev_xx,realw* epsilondev_yy,realw* epsilondev_xy,
+ realw* epsilondev_xz,realw* epsilondev_yz,
+ realw* epsilon_trace_over_3,
+ int SIMULATION_TYPE,
+ int ATTENUATION,
+ int USE_ATTENUATION_MIMIC,
+ realw* one_minus_sum_beta,realw* factor_common,
+ realw* R_xx, realw* R_yy, realw* R_xy, realw* R_xz, realw* R_yz,
+ realw* alphaval,realw* betaval,realw* gammaval,
+ int ANISOTROPY,
+ realw* d_c11store,realw* d_c12store,realw* d_c13store,
+ realw* d_c33store,realw* d_c44store,
+ int GRAVITY,
+ realw* d_xstore,realw* d_ystore,realw* d_zstore,
+ realw* d_minus_gravity_table,
+ realw* d_minus_deriv_gravity_table,
+ realw* d_density_table,
+ realw* wgll_cube){
+
+ /* int bx = blockIdx.y*blockDim.x+blockIdx.x; //possible bug in original code*/
+ int bx = blockIdx.y*gridDim.x+blockIdx.x;
+ /* int bx = blockIdx.x; */
+ int tx = threadIdx.x;
+
+ //const int NGLLX = 5;
+ // const int NGLL2 = 25;
+ //const int NGLL3 = NGLL3;
+ const int NGLL3_ALIGN = NGLL3_PADDED;
+ const int IFLAG_IN_FICTITIOUS_CUBE = 11; // from constants.h
+
+ int K = (tx/NGLL2);
+ int J = ((tx-K*NGLL2)/NGLLX);
+ int I = (tx-K*NGLL2-J*NGLLX);
+
+ int active,offset;
+ int iglob = 0;
+ int working_element;
+
+ reald tempx1l,tempx2l,tempx3l,tempy1l,tempy2l,tempy3l,tempz1l,tempz2l,tempz3l;
+ reald xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl;
+ reald duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl;
+ reald duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl;
+ reald duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl;
+ reald fac1,fac2,fac3;
+ reald lambdal,mul,lambdalplus2mul,kappal;
+ reald mul_iso,mul_aniso;
+ reald sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz;
+ reald epsilondev_xx_loc,epsilondev_yy_loc,epsilondev_xy_loc,epsilondev_xz_loc,epsilondev_yz_loc;
+ reald c11,c12,c13,c33,c44;
+ reald sum_terms1,sum_terms2,sum_terms3;
+
+ // gravity variables
+ reald sigma_yx,sigma_zx,sigma_zy;
+ reald rho_s_H1,rho_s_H2,rho_s_H3;
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+ int l;
+ realw hp1,hp2,hp3;
+#endif
+
+ __shared__ reald s_dummyx_loc[NGLL3];
+ __shared__ reald s_dummyy_loc[NGLL3];
+ __shared__ reald s_dummyz_loc[NGLL3];
+
+ __shared__ reald s_tempx1[NGLL3];
+ __shared__ reald s_tempx2[NGLL3];
+ __shared__ reald s_tempx3[NGLL3];
+ __shared__ reald s_tempy1[NGLL3];
+ __shared__ reald s_tempy2[NGLL3];
+ __shared__ reald s_tempy3[NGLL3];
+ __shared__ reald s_tempz1[NGLL3];
+ __shared__ reald s_tempz2[NGLL3];
+ __shared__ reald s_tempz3[NGLL3];
+
+// use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
+// because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
+ active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
+
+// copy from global memory to shared memory
+// each thread writes one of the NGLL^3 = 125 data points
+ if (active) {
+
+#ifdef USE_MESH_COLORING_GPU
+ working_element = bx;
+#else
+ //mesh coloring
+ if( use_mesh_coloring_gpu ){
+ working_element = bx;
+ }else{
+ // iphase-1 and working_element-1 for Fortran->C array conventions
+ working_element = d_phase_ispec_inner[bx + num_phase_ispec*(d_iphase-1)]-1;
+ }
+#endif
+
+ // exclude fictitious elements in central cube
+ if( d_idoubling[working_element] == IFLAG_IN_FICTITIOUS_CUBE ){
+ active = 0;
+ }else{
+ // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
+ iglob = d_ibool[working_element*NGLL3 + tx]-1;
+
+#ifdef USE_TEXTURES
+ s_dummyx_loc[tx] = tex1Dfetch(tex_displ, iglob);
+ s_dummyy_loc[tx] = tex1Dfetch(tex_displ, iglob + NGLOB);
+ s_dummyz_loc[tx] = tex1Dfetch(tex_displ, iglob + 2*NGLOB);
+#else
+ // changing iglob indexing to match fortran row changes fast style
+ s_dummyx_loc[tx] = d_displ[iglob*3];
+ s_dummyy_loc[tx] = d_displ[iglob*3 + 1];
+ s_dummyz_loc[tx] = d_displ[iglob*3 + 2];
+#endif
+ }
+ }
+
+// synchronize all the threads (one thread for each of the NGLL grid points of the
+// current spectral element) because we need the whole element to be ready in order
+// to be able to compute the matrix products along cut planes of the 3D element below
+ __syncthreads();
+
+#ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+
+ if (active) {
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+
+ tempx1l = 0.f;
+ tempx2l = 0.f;
+ tempx3l = 0.f;
+
+ tempy1l = 0.f;
+ tempy2l = 0.f;
+ tempy3l = 0.f;
+
+ tempz1l = 0.f;
+ tempz2l = 0.f;
+ tempz3l = 0.f;
+
+ for (l=0;l<NGLLX;l++) {
+ hp1 = d_hprime_xx[l*NGLLX+I];
+ offset = K*NGLL2+J*NGLLX+l;
+ tempx1l += s_dummyx_loc[offset]*hp1;
+ tempy1l += s_dummyy_loc[offset]*hp1;
+ tempz1l += s_dummyz_loc[offset]*hp1;
+
+ hp2 = d_hprime_xx[l*NGLLX+J];
+ offset = K*NGLL2+l*NGLLX+I;
+ tempx2l += s_dummyx_loc[offset]*hp2;
+ tempy2l += s_dummyy_loc[offset]*hp2;
+ tempz2l += s_dummyz_loc[offset]*hp2;
+
+ hp3 = d_hprime_xx[l*NGLLX+K];
+ offset = l*NGLL2+J*NGLLX+I;
+ tempx3l += s_dummyx_loc[offset]*hp3;
+ tempy3l += s_dummyy_loc[offset]*hp3;
+ tempz3l += s_dummyz_loc[offset]*hp3;
+
+ }
+#else
+
+ tempx1l = s_dummyx_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+ tempy1l = s_dummyy_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+ tempz1l = s_dummyz_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+ tempx2l = s_dummyx_loc[K*NGLL2+I]*d_hprime_xx[J]
+ + s_dummyx_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+ + s_dummyx_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+ + s_dummyx_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+ + s_dummyx_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+ tempy2l = s_dummyy_loc[K*NGLL2+I]*d_hprime_xx[J]
+ + s_dummyy_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+ + s_dummyy_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+ + s_dummyy_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+ + s_dummyy_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+ tempz2l = s_dummyz_loc[K*NGLL2+I]*d_hprime_xx[J]
+ + s_dummyz_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+ + s_dummyz_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+ + s_dummyz_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+ + s_dummyz_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+ tempx3l = s_dummyx_loc[J*NGLLX+I]*d_hprime_xx[K]
+ + s_dummyx_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+ + s_dummyx_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+ + s_dummyx_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+ + s_dummyx_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+
+ tempy3l = s_dummyy_loc[J*NGLLX+I]*d_hprime_xx[K]
+ + s_dummyy_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+ + s_dummyy_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+ + s_dummyy_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+ + s_dummyy_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+
+ tempz3l = s_dummyz_loc[J*NGLLX+I]*d_hprime_xx[K]
+ + s_dummyz_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+ + s_dummyz_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+ + s_dummyz_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+ + s_dummyz_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+
+#endif
+
+// compute derivatives of ux, uy and uz with respect to x, y and z
+ offset = working_element*NGLL3_ALIGN + tx;
+
+ xixl = d_xix[offset];
+ xiyl = d_xiy[offset];
+ xizl = d_xiz[offset];
+ etaxl = d_etax[offset];
+ etayl = d_etay[offset];
+ etazl = d_etaz[offset];
+ gammaxl = d_gammax[offset];
+ gammayl = d_gammay[offset];
+ gammazl = d_gammaz[offset];
+
+ duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l;
+ duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l;
+ duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l;
+
+ duydxl = xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l;
+ duydyl = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l;
+ duydzl = xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l;
+
+ duzdxl = xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l;
+ duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l;
+ duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l;
+
+ // precompute some sums to save CPU time
+ duxdxl_plus_duydyl = duxdxl + duydyl;
+ duxdxl_plus_duzdzl = duxdxl + duzdzl;
+ duydyl_plus_duzdzl = duydyl + duzdzl;
+ duxdyl_plus_duydxl = duxdyl + duydxl;
+ duzdxl_plus_duxdzl = duzdxl + duxdzl;
+ duzdyl_plus_duydzl = duzdyl + duydzl;
+
+ // computes deviatoric strain attenuation and/or for kernel calculations
+ if(COMPUTE_AND_STORE_STRAIN) {
+ realw templ = 0.33333333333333333333f * (duxdxl + duydyl + duzdzl); // 1./3. = 0.33333
+
+ // local storage: stresses at this current time step
+ epsilondev_xx_loc = duxdxl - templ;
+ epsilondev_yy_loc = duydyl - templ;
+ epsilondev_xy_loc = 0.5f * duxdyl_plus_duydxl;
+ epsilondev_xz_loc = 0.5f * duzdxl_plus_duxdzl;
+ epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl;
+
+ if(SIMULATION_TYPE == 3) {
+ epsilon_trace_over_3[tx + working_element*NGLL3] = templ;
+ }
+ }
+
+ // compute elements with an elastic isotropic rheology
+ kappal = d_kappav[offset];
+ mul = d_muv[offset];
+
+ // attenuation
+ if(ATTENUATION){
+ // use unrelaxed parameters if attenuation
+ mul_iso = mul * one_minus_sum_beta[tx+working_element*NGLL3]; // (i,j,k,ispec)
+ mul_aniso = mul *( one_minus_sum_beta[tx+working_element*NGLL3] - 1.0f );
+ }else{
+ mul_iso = mul;
+ }
+
+ // full anisotropic case, stress calculations
+ if(ANISOTROPY){
+
+ // elastic tensor for hexagonal symmetry in reduced notation:
+ //
+ // c11 c12 c13 0 0 0
+ // c12 c11 c13 0 0 0
+ // c13 c13 c33 0 0 0
+ // 0 0 0 c44 0 0
+ // 0 0 0 0 c44 0
+ // 0 0 0 0 0 (c11-c12)/2
+ //
+ // in terms of the A, C, L, N and F of Love (1927):
+ //
+ // c11 = A
+ // c12 = A-2N
+ // c13 = F
+ // c33 = C
+ // c44 = L
+
+ c11 = d_c11store[offset];
+ c12 = d_c12store[offset];
+ c13 = d_c13store[offset];
+ c33 = d_c33store[offset];
+ c44 = d_c44store[offset];
+
+ // use unrelaxed parameters if attenuation
+ if( ATTENUATION){
+ c11 = c11 + 1.33333333333333333333f * mul_aniso; // FOUR_THIRDS = 1.33333
+ c12 = c12 - 0.66666666666666666666f * mul_aniso; // TWO_THIRDS = 0.66666666666666666666f
+ c13 = c13 - 0.66666666666666666666f * mul_aniso;
+ c33 = c33 + 1.33333333333333333333f * mul_aniso;
+ c44 = c44 + mul_aniso;
+ }
+
+ sigma_xx = c11*duxdxl + c12*duydyl + c13*duzdzl;
+ sigma_yy = c12*duxdxl + c11*duydyl + c13*duzdzl;
+ sigma_zz = c13*duxdxl + c13*duydyl + c33*duzdzl;
+ sigma_xy = 0.5f*(c11-c12)*duxdyl_plus_duydxl;
+ sigma_xz = c44*duzdxl_plus_duxdzl;
+ sigma_yz = c44*duzdyl_plus_duydzl;
+
+ }else{
+
+ // isotropic case
+
+ lambdalplus2mul = kappal + 1.33333333333333333333f * mul_iso; // 4./3. = 1.3333333
+ lambdal = lambdalplus2mul - 2.0f * mul_iso;
+
+ // compute the six components of the stress tensor sigma
+ sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl;
+ sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl;
+ sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl;
+
+ sigma_xy = mul*duxdyl_plus_duydxl;
+ sigma_xz = mul*duzdxl_plus_duxdzl;
+ sigma_yz = mul*duzdyl_plus_duydzl;
+ }
+
+ if(ATTENUATION && ( ! USE_ATTENUATION_MIMIC ) ){
+ // subtracts memory variables if attenuation
+ compute_element_ic_att_stress(tx,working_element,
+ R_xx,R_yy,R_xy,R_xz,R_yz,
+ &sigma_xx,&sigma_yy,&sigma_zz,&sigma_xy,&sigma_xz,&sigma_yz);
+ }
+
+ // define symmetric components (needed for non-symmetric dot product and sigma for gravity)
+ sigma_yx = sigma_xy;
+ sigma_zx = sigma_xz;
+ sigma_zy = sigma_yz;
+
+ // jacobian
+ jacobianl = 1.0f / (xixl*(etayl*gammazl-etazl*gammayl)
+ -xiyl*(etaxl*gammazl-etazl*gammaxl)
+ +xizl*(etaxl*gammayl-etayl*gammaxl));
+
+ if( GRAVITY ){
+ // computes non-symmetric terms for gravity
+ compute_element_ic_gravity(tx,working_element,
+ d_ibool,d_xstore,d_ystore,d_zstore,
+ d_minus_gravity_table,d_minus_deriv_gravity_table,d_density_table,
+ wgll_cube,jacobianl,
+ s_dummyx_loc,s_dummyy_loc,s_dummyz_loc,
+ &sigma_xx,&sigma_yy,&sigma_zz,&sigma_xy,&sigma_yx,
+ &sigma_xz,&sigma_zx,&sigma_yz,&sigma_zy,
+ &rho_s_H1,&rho_s_H2,&rho_s_H3);
+ }
+
+ // form dot product with test vector, non-symmetric form
+ s_tempx1[tx] = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl);
+ s_tempy1[tx] = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl);
+ s_tempz1[tx] = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl);
+
+ s_tempx2[tx] = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl);
+ s_tempy2[tx] = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl);
+ s_tempz2[tx] = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl);
+
+ s_tempx3[tx] = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl);
+ s_tempy3[tx] = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl);
+ s_tempz3[tx] = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl);
+
+ }
+
+// synchronize all the threads (one thread for each of the NGLL grid points of the
+// current spectral element) because we need the whole element to be ready in order
+// to be able to compute the matrix products along cut planes of the 3D element below
+ __syncthreads();
+
+ if (active) {
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+
+ tempx1l = 0.f;
+ tempy1l = 0.f;
+ tempz1l = 0.f;
+
+ tempx2l = 0.f;
+ tempy2l = 0.f;
+ tempz2l = 0.f;
+
+ tempx3l = 0.f;
+ tempy3l = 0.f;
+ tempz3l = 0.f;
+
+ for (l=0;l<NGLLX;l++) {
+
+ fac1 = d_hprimewgll_xx[I*NGLLX+l];
+ offset = K*NGLL2+J*NGLLX+l;
+ tempx1l += s_tempx1[offset]*fac1;
+ tempy1l += s_tempy1[offset]*fac1;
+ tempz1l += s_tempz1[offset]*fac1;
+
+ fac2 = d_hprimewgll_yy[J*NGLLX+l];
+ offset = K*NGLL2+l*NGLLX+I;
+ tempx2l += s_tempx2[offset]*fac2;
+ tempy2l += s_tempy2[offset]*fac2;
+ tempz2l += s_tempz2[offset]*fac2;
+
+ fac3 = d_hprimewgll_zz[K*NGLLX+l];
+ offset = l*NGLL2+J*NGLLX+I;
+ tempx3l += s_tempx3[offset]*fac3;
+ tempy3l += s_tempy3[offset]*fac3;
+ tempz3l += s_tempz3[offset]*fac3;
+
+ }
+#else
+
+ tempx1l = s_tempx1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
+ + s_tempx1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
+ + s_tempx1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
+ + s_tempx1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
+ + s_tempx1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
+
+ tempy1l = s_tempy1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
+ + s_tempy1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
+ + s_tempy1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
+ + s_tempy1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
+ + s_tempy1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
+
+ tempz1l = s_tempz1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
+ + s_tempz1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
+ + s_tempz1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
+ + s_tempz1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
+ + s_tempz1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
+
+ tempx2l = s_tempx2[K*NGLL2+I]*d_hprimewgll_yy[J*NGLLX]
+ + s_tempx2[K*NGLL2+NGLLX+I]*d_hprimewgll_yy[J*NGLLX+1]
+ + s_tempx2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+2]
+ + s_tempx2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+3]
+ + s_tempx2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+4];
+
+ tempy2l = s_tempy2[K*NGLL2+I]*d_hprimewgll_yy[J*NGLLX]
+ + s_tempy2[K*NGLL2+NGLLX+I]*d_hprimewgll_yy[J*NGLLX+1]
+ + s_tempy2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+2]
+ + s_tempy2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+3]
+ + s_tempy2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+4];
+
+ tempz2l = s_tempz2[K*NGLL2+I]*d_hprimewgll_yy[J*NGLLX]
+ + s_tempz2[K*NGLL2+NGLLX+I]*d_hprimewgll_yy[J*NGLLX+1]
+ + s_tempz2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+2]
+ + s_tempz2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+3]
+ + s_tempz2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_yy[J*NGLLX+4];
+
+ tempx3l = s_tempx3[J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX]
+ + s_tempx3[NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+1]
+ + s_tempx3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+2]
+ + s_tempx3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+3]
+ + s_tempx3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+4];
+
+ tempy3l = s_tempy3[J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX]
+ + s_tempy3[NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+1]
+ + s_tempy3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+2]
+ + s_tempy3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+3]
+ + s_tempy3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+4];
+
+ tempz3l = s_tempz3[J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX]
+ + s_tempz3[NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+1]
+ + s_tempz3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+2]
+ + s_tempz3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+3]
+ + s_tempz3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_zz[K*NGLLX+4];
+
+#endif
+
+ fac1 = d_wgllwgll_yz[K*NGLLX+J];
+ fac2 = d_wgllwgll_xz[K*NGLLX+I];
+ fac3 = d_wgllwgll_xy[J*NGLLX+I];
+
+ sum_terms1 = - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
+ sum_terms2 = - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
+ sum_terms3 = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+
+ // adds gravity term
+ if( GRAVITY ){
+ sum_terms1 += rho_s_H1;
+ sum_terms2 += rho_s_H2;
+ sum_terms3 += rho_s_H3;
+ }
+
+#ifdef USE_TEXTURES
+ d_accel[iglob] = tex1Dfetch(tex_accel, iglob) + sum_terms1);
+ d_accel[iglob + NGLOB] = tex1Dfetch(tex_accel, iglob + NGLOB) + sum_terms2);
+ d_accel[iglob + 2*NGLOB] = tex1Dfetch(tex_accel, iglob + 2*NGLOB) + sum_terms3);
+#else
+ /* OLD/To be implemented version that uses coloring to get around race condition. About 1.6x faster */
+
+
+#ifdef USE_MESH_COLORING_GPU
+ // no atomic operation needed, colors don't share global points between elements
+ d_accel[iglob*3] += sum_terms1;
+ d_accel[iglob*3 + 1] += sum_terms2;
+ d_accel[iglob*3 + 2] += sum_terms3;
+#else
+ //mesh coloring
+ if( use_mesh_coloring_gpu ){
+
+ // no atomic operation needed, colors don't share global points between elements
+ d_accel[iglob*3] += sum_terms1;
+ d_accel[iglob*3 + 1] += sum_terms2;
+ d_accel[iglob*3 + 2] += sum_terms3;
+
+ }else{
+
+ // for testing purposes only: w/out atomic updates
+ //d_accel[iglob*3] -= (0.00000001f*tempx1l + 0.00000001f*tempx2l + 0.00000001f*tempx3l);
+ //d_accel[iglob*3 + 1] -= (0.00000001f*tempy1l + 0.00000001f*tempy2l + 0.00000001f*tempy3l);
+ //d_accel[iglob*3 + 2] -= (0.00000001f*tempz1l + 0.00000001f*tempz2l + 0.00000001f*tempz3l);
+
+ atomicAdd(&d_accel[iglob*3], sum_terms1);
+ atomicAdd(&d_accel[iglob*3+1], sum_terms2);
+ atomicAdd(&d_accel[iglob*3+2], sum_terms3);
+
+ }
+#endif
+
+#endif
+
+ // update memory variables based upon the Runge-Kutta scheme
+ if( ATTENUATION && ! USE_ATTENUATION_MIMIC ){
+ compute_element_ic_att_memory(tx,working_element,
+ d_muv,
+ factor_common,alphaval,betaval,gammaval,
+ R_xx,R_yy,R_xy,R_xz,R_yz,
+ epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz,
+ epsilondev_xx_loc,epsilondev_yy_loc,epsilondev_xy_loc,epsilondev_xz_loc,epsilondev_yz_loc);
+ }
+
+ // save deviatoric strain for Runge-Kutta scheme
+ if( COMPUTE_AND_STORE_STRAIN ){
+ int ijk_ispec = tx + working_element*NGLL3;
+
+ // fortran: epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
+ epsilondev_xx[ijk_ispec] = epsilondev_xx_loc;
+ epsilondev_yy[ijk_ispec] = epsilondev_yy_loc;
+ epsilondev_xy[ijk_ispec] = epsilondev_xy_loc;
+ epsilondev_xz[ijk_ispec] = epsilondev_xz_loc;
+ epsilondev_yz[ijk_ispec] = epsilondev_yz_loc;
+ }
+
+ }
+
+#else // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+ d_accel[iglob] -= 0.00000001f;
+ d_accel[iglob + NGLOB] -= 0.00000001f;
+ d_accel[iglob + 2*NGLOB] -= 0.00000001f;
+#endif // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void Kernel_2_inner_core(int nb_blocks_to_compute,Mesh* mp,
+ int d_iphase,
+ int* d_ibool,
+ int* d_idoubling,
+ realw* d_xix,realw* d_xiy,realw* d_xiz,
+ realw* d_etax,realw* d_etay,realw* d_etaz,
+ realw* d_gammax,realw* d_gammay,realw* d_gammaz,
+ realw* d_kappav,
+ realw* d_muv,
+ realw* d_epsilondev_xx,
+ realw* d_epsilondev_yy,
+ realw* d_epsilondev_xy,
+ realw* d_epsilondev_xz,
+ realw* d_epsilondev_yz,
+ realw* d_epsilon_trace_over_3,
+ realw* d_one_minus_sum_beta,
+ realw* d_factor_common,
+ realw* d_R_xx,
+ realw* d_R_yy,
+ realw* d_R_xy,
+ realw* d_R_xz,
+ realw* d_R_yz,
+ realw* d_b_epsilondev_xx,
+ realw* d_b_epsilondev_yy,
+ realw* d_b_epsilondev_xy,
+ realw* d_b_epsilondev_xz,
+ realw* d_b_epsilondev_yz,
+ realw* d_b_epsilon_trace_over_3,
+ realw* d_b_R_xx,
+ realw* d_b_R_yy,
+ realw* d_b_R_xy,
+ realw* d_b_R_xz,
+ realw* d_b_R_yz,
+ realw* d_c11store,realw* d_c12store,realw* d_c13store,
+ realw* d_c33store,realw* d_c44store
+ ){
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("before kernel Kernel_2_inner_core");
+#endif
+
+ /* if the grid can handle the number of blocks, we let it be 1D */
+ /* grid_2_x = nb_elem_color; */
+ /* nb_elem_color is just how many blocks we are computing now */
+
+ int num_blocks_x = nb_blocks_to_compute;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ int blocksize = NGLL3_PADDED;
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // Cuda timing
+ // cudaEvent_t start, stop;
+ // realw time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
+ // cudaEventRecord( start, 0 );
+
+ Kernel_2_inner_core_impl<<<grid,threads>>>(nb_blocks_to_compute,
+ mp->NGLOB_AB,
+ d_ibool,
+ d_idoubling,
+ mp->d_phase_ispec_inner_inner_core,
+ mp->num_phase_ispec_inner_core,
+ d_iphase,
+ mp->use_mesh_coloring_gpu,
+ mp->d_displ_inner_core,
+ mp->d_accel_inner_core,
+ d_xix, d_xiy, d_xiz,
+ d_etax, d_etay, d_etaz,
+ d_gammax, d_gammay, d_gammaz,
+ mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
+ mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
+ mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+ d_kappav, d_muv,
+ mp->compute_and_store_strain,
+ d_epsilondev_xx,
+ d_epsilondev_yy,
+ d_epsilondev_xy,
+ d_epsilondev_xz,
+ d_epsilondev_yz,
+ d_epsilon_trace_over_3,
+ mp->simulation_type,
+ mp->attenuation,
+ mp->use_attenuation_mimic,
+ d_one_minus_sum_beta,
+ d_factor_common,
+ d_R_xx,d_R_yy,d_R_xy,d_R_xz,d_R_yz,
+ mp->d_alphaval,mp->d_betaval,mp->d_gammaval,
+ mp->anisotropic_inner_core,
+ d_c11store,d_c12store,d_c13store,
+ d_c33store,d_c44store,
+ mp->gravity,
+ mp->d_xstore_inner_core,mp->d_ystore_inner_core,mp->d_zstore_inner_core,
+ mp->d_minus_gravity_table,
+ mp->d_minus_deriv_gravity_table,
+ mp->d_density_table,
+ mp->d_wgll_cube);
+
+
+ if(mp->simulation_type == 3) {
+ Kernel_2_inner_core_impl<<< grid,threads>>>(nb_blocks_to_compute,
+ mp->NGLOB_AB,
+ d_ibool,
+ d_idoubling,
+ mp->d_phase_ispec_inner_inner_core,
+ mp->num_phase_ispec_inner_core,
+ d_iphase,
+ mp->use_mesh_coloring_gpu,
+ mp->d_b_displ_inner_core,
+ mp->d_b_accel_inner_core,
+ d_xix, d_xiy, d_xiz,
+ d_etax, d_etay, d_etaz,
+ d_gammax, d_gammay, d_gammaz,
+ mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
+ mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
+ mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+ d_kappav, d_muv,
+ mp->compute_and_store_strain,
+ d_b_epsilondev_xx,
+ d_b_epsilondev_yy,
+ d_b_epsilondev_xy,
+ d_b_epsilondev_xz,
+ d_b_epsilondev_yz,
+ d_b_epsilon_trace_over_3,
+ mp->simulation_type,
+ mp->attenuation,
+ mp->use_attenuation_mimic,
+ d_one_minus_sum_beta,
+ d_factor_common,
+ d_b_R_xx,d_b_R_yy,d_b_R_xy,d_b_R_xz,d_b_R_yz,
+ mp->d_b_alphaval,mp->d_b_betaval,mp->d_b_gammaval,
+ mp->anisotropic_inner_core,
+ d_c11store,d_c12store,d_c13store,
+ d_c33store,d_c44store,
+ mp->gravity,
+ mp->d_xstore_inner_core,mp->d_ystore_inner_core,mp->d_zstore_inner_core,
+ mp->d_minus_gravity_table,
+ mp->d_minus_deriv_gravity_table,
+ mp->d_density_table,
+ mp->d_wgll_cube);
+ }
+
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("Kernel2 Execution Time: %f ms\n",time);
+
+ /* cudaThreadSynchronize(); */
+ /* LOG("Kernel 2 finished"); */
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("Kernel_2_inner_core");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(compute_forces_inner_core_cuda,
+ COMPUTE_FORCES_INNER_CORE_CUDA)(long* Mesh_pointer_f,
+ int* iphase) {
+
+ TRACE("compute_forces_inner_core_cuda");
+
+//daniel: debug
+ //printf("Running compute_forces_inner_core_cuda\n");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ int num_elements;
+
+ if( *iphase == 1 )
+ num_elements = mp->nspec_outer_inner_core;
+ else
+ num_elements = mp->nspec_inner_inner_core;
+
+ // checks if anything to do
+ if( num_elements == 0 ) return;
+
+ // mesh coloring
+ if( mp->use_mesh_coloring_gpu ){
+
+ // note: array offsets require sorted arrays, such that e.g. ibool starts with elastic elements
+ // and followed by acoustic ones.
+ // elastic elements also start with outer than inner element ordering
+
+ int nb_colors,nb_blocks_to_compute;
+ int istart;
+ int color_offset,color_offset_nonpadded,color_offset_nonpadded_att2;
+ int color_offset_ispec;
+
+ // sets up color loop
+ if( *iphase == 1 ){
+ // outer elements
+ nb_colors = mp->num_colors_outer_inner_core;
+ istart = 0;
+
+ // array offsets
+ color_offset = 0;
+ color_offset_nonpadded = 0;
+ color_offset_nonpadded_att2 = 0;
+ color_offset_ispec = 0;
+ }else{
+ // inner elements (start after outer elements)
+ nb_colors = mp->num_colors_outer_inner_core + mp->num_colors_inner_inner_core;
+ istart = mp->num_colors_outer_inner_core;
+
+ // array offsets
+ color_offset = (mp->nspec_outer_inner_core) * NGLL3_PADDED;
+ color_offset_nonpadded = (mp->nspec_outer_inner_core) * NGLL3;
+ color_offset_nonpadded_att2 = (mp->nspec_outer_inner_core) * NGLL3 * N_SLS;
+ color_offset_ispec = mp->nspec_outer_inner_core;
+ }
+
+ // loops over colors
+ for(int icolor = istart; icolor < nb_colors; icolor++){
+
+ nb_blocks_to_compute = mp->h_num_elem_colors_inner_core[icolor];
+
+ // checks
+ //if( nb_blocks_to_compute <= 0 ){
+ // printf("error number of elastic color blocks: %d -- color = %d \n",nb_blocks_to_compute,icolor);
+ // exit(EXIT_FAILURE);
+ //}
+
+ Kernel_2_inner_core(nb_blocks_to_compute,mp,
+ *iphase,
+ mp->d_ibool_inner_core + color_offset_nonpadded,
+ mp->d_idoubling_inner_core + color_offset_ispec,
+ mp->d_xix_inner_core + color_offset,
+ mp->d_xiy_inner_core + color_offset,
+ mp->d_xiz_inner_core + color_offset,
+ mp->d_etax_inner_core + color_offset,
+ mp->d_etay_inner_core + color_offset,
+ mp->d_etaz_inner_core + color_offset,
+ mp->d_gammax_inner_core + color_offset,
+ mp->d_gammay_inner_core + color_offset,
+ mp->d_gammaz_inner_core + color_offset,
+ mp->d_kappavstore_inner_core + color_offset,
+ mp->d_muvstore_inner_core + color_offset,
+ mp->d_epsilondev_xx_inner_core + color_offset_nonpadded,
+ mp->d_epsilondev_yy_inner_core + color_offset_nonpadded,
+ mp->d_epsilondev_xy_inner_core + color_offset_nonpadded,
+ mp->d_epsilondev_xz_inner_core + color_offset_nonpadded,
+ mp->d_epsilondev_yz_inner_core + color_offset_nonpadded,
+ mp->d_eps_trace_over_3_inner_core + color_offset_nonpadded,
+ mp->d_one_minus_sum_beta_inner_core + color_offset_nonpadded,
+ mp->d_factor_common_inner_core + color_offset_nonpadded_att2,
+ mp->d_R_xx_inner_core + color_offset_nonpadded,
+ mp->d_R_yy_inner_core + color_offset_nonpadded,
+ mp->d_R_xy_inner_core + color_offset_nonpadded,
+ mp->d_R_xz_inner_core + color_offset_nonpadded,
+ mp->d_R_yz_inner_core + color_offset_nonpadded,
+ mp->d_b_epsilondev_xx_inner_core + color_offset_nonpadded,
+ mp->d_b_epsilondev_yy_inner_core + color_offset_nonpadded,
+ mp->d_b_epsilondev_xy_inner_core + color_offset_nonpadded,
+ mp->d_b_epsilondev_xz_inner_core + color_offset_nonpadded,
+ mp->d_b_epsilondev_yz_inner_core + color_offset_nonpadded,
+ mp->d_b_eps_trace_over_3_inner_core + color_offset_nonpadded,
+ mp->d_b_R_xx_inner_core + color_offset_nonpadded,
+ mp->d_b_R_yy_inner_core + color_offset_nonpadded,
+ mp->d_b_R_xy_inner_core + color_offset_nonpadded,
+ mp->d_b_R_xz_inner_core + color_offset_nonpadded,
+ mp->d_b_R_yz_inner_core + color_offset_nonpadded,
+ mp->d_c11store_inner_core + color_offset,
+ mp->d_c12store_inner_core + color_offset,
+ mp->d_c13store_inner_core + color_offset,
+ mp->d_c33store_inner_core + color_offset,
+ mp->d_c44store_inner_core + color_offset
+ );
+
+ // for padded and aligned arrays
+ color_offset += nb_blocks_to_compute * NGLL3_PADDED;
+ // for no-aligned arrays
+ color_offset_nonpadded += nb_blocks_to_compute * NGLL3;
+ // for factor_common array
+ color_offset_nonpadded_att2 += nb_blocks_to_compute * NGLL3 * N_SLS;
+ // for array(ispec)
+ color_offset_ispec += nb_blocks_to_compute;
+ }
+
+ }else{
+
+ // no mesh coloring: uses atomic updates
+
+ Kernel_2_inner_core(num_elements,mp,
+ *iphase,
+ mp->d_ibool_inner_core,
+ mp->d_idoubling_inner_core,
+ mp->d_xix_inner_core,mp->d_xiy_inner_core,mp->d_xiz_inner_core,
+ mp->d_etax_inner_core,mp->d_etay_inner_core,mp->d_etaz_inner_core,
+ mp->d_gammax_inner_core,mp->d_gammay_inner_core,mp->d_gammaz_inner_core,
+ mp->d_kappavstore_inner_core,
+ mp->d_muvstore_inner_core,
+ mp->d_epsilondev_xx_inner_core,
+ mp->d_epsilondev_yy_inner_core,
+ mp->d_epsilondev_xy_inner_core,
+ mp->d_epsilondev_xz_inner_core,
+ mp->d_epsilondev_yz_inner_core,
+ mp->d_eps_trace_over_3_inner_core,
+ mp->d_one_minus_sum_beta_inner_core,
+ mp->d_factor_common_inner_core,
+ mp->d_R_xx_inner_core,
+ mp->d_R_yy_inner_core,
+ mp->d_R_xy_inner_core,
+ mp->d_R_xz_inner_core,
+ mp->d_R_yz_inner_core,
+ mp->d_b_epsilondev_xx_inner_core,
+ mp->d_b_epsilondev_yy_inner_core,
+ mp->d_b_epsilondev_xy_inner_core,
+ mp->d_b_epsilondev_xz_inner_core,
+ mp->d_b_epsilondev_yz_inner_core,
+ mp->d_b_eps_trace_over_3_inner_core,
+ mp->d_b_R_xx_inner_core,
+ mp->d_b_R_yy_inner_core,
+ mp->d_b_R_xy_inner_core,
+ mp->d_b_R_xz_inner_core,
+ mp->d_b_R_yz_inner_core,
+ mp->d_c11store_inner_core,mp->d_c12store_inner_core,mp->d_c13store_inner_core,
+ mp->d_c33store_inner_core,mp->d_c44store_inner_core
+ );
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("compute_forces_inner_core_cuda");
+#endif
+}
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,673 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// elemental routines
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// fluid rotation
+
+__device__ void compute_element_oc_rotation(int tx,int working_element,
+ realw time,
+ realw two_omega_earth,
+ realw deltat,
+ realw* d_A_array_rotation,
+ realw* d_B_array_rotation,
+ reald dpotentialdxl, reald dpotentialdyl,
+ reald* dpotentialdx_with_rot,
+ reald* dpotentialdy_with_rot) {
+
+ reald two_omega_deltat,cos_two_omega_t,sin_two_omega_t;
+ reald A_rotation,B_rotation;
+ reald ux_rotation,uy_rotation;
+ reald source_euler_A,source_euler_B;
+
+ // non-padded offset
+ int offset_nonpadded = tx + working_element*NGLL3;
+
+ // store the source for the Euler scheme for A_rotation and B_rotation
+ two_omega_deltat = deltat * two_omega_earth;
+
+ cos_two_omega_t = cos(two_omega_earth*time);
+ sin_two_omega_t = sin(two_omega_earth*time);
+
+ // time step deltat of Euler scheme is included in the source
+ source_euler_A = two_omega_deltat * (cos_two_omega_t * dpotentialdyl + sin_two_omega_t * dpotentialdxl);
+ source_euler_B = two_omega_deltat * (sin_two_omega_t * dpotentialdyl - cos_two_omega_t * dpotentialdxl);
+
+ A_rotation = d_A_array_rotation[offset_nonpadded];
+ B_rotation = d_B_array_rotation[offset_nonpadded];
+
+ ux_rotation = A_rotation*cos_two_omega_t + B_rotation*sin_two_omega_t;
+ uy_rotation = - A_rotation*sin_two_omega_t + B_rotation*cos_two_omega_t;
+
+ *dpotentialdx_with_rot = dpotentialdxl + ux_rotation;
+ *dpotentialdy_with_rot = dpotentialdyl + uy_rotation;
+
+ // updates rotation term with Euler scheme
+ d_A_array_rotation[offset_nonpadded] += source_euler_A;
+ d_B_array_rotation[offset_nonpadded] += source_euler_B;
+
+ return;
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// KERNEL 2
+//
+// for outer core ( acoustic domain )
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void Kernel_2_outer_core_impl(int nb_blocks_to_compute,
+ int NGLOB, int* d_ibool,
+ int* d_phase_ispec_inner,
+ int num_phase_ispec,
+ int d_iphase,
+ int use_mesh_coloring_gpu,
+ realw* d_potential, realw* d_potential_dot_dot,
+ realw* d_xix, realw* d_xiy, realw* d_xiz,
+ realw* d_etax, realw* d_etay, realw* d_etaz,
+ realw* d_gammax, realw* d_gammay, realw* d_gammaz,
+ realw* hprime_xx, realw* hprime_yy, realw* hprime_zz,
+ realw* hprimewgll_xx, realw* hprimewgll_yy, realw* hprimewgll_zz,
+ realw* wgllwgll_xy,realw* wgllwgll_xz,realw* wgllwgll_yz,
+ int GRAVITY,
+ realw* d_xstore, realw* d_ystore, realw* d_zstore,
+ realw* d_d_ln_density_dr_table,
+ realw* d_minus_rho_g_over_kappa_fluid,
+ realw* wgll_cube,
+ int ROTATION,
+ realw time,
+ realw two_omega_earth,
+ realw deltat,
+ realw* d_A_array_rotation,realw* d_B_array_rotation){
+
+ int bx = blockIdx.y*gridDim.x+blockIdx.x;
+ int tx = threadIdx.x;
+
+ //const int NGLL3 = NGLL3;
+ const int NGLL3_ALIGN = NGLL3_PADDED;
+ // R_EARTH_KM is the radius of the bottom of the oceans (radius of Earth in km)
+ const reald R_EARTH_KM = 6371.0f;
+ // uncomment line below for PREM with oceans
+ //const reald R_EARTH_KM = 6368.0f;
+
+ int K = (tx/NGLL2);
+ int J = ((tx-K*NGLL2)/NGLLX);
+ int I = (tx-K*NGLL2-J*NGLLX);
+
+ int active,offset;
+ int iglob = 0;
+ int working_element;
+ reald temp1l,temp2l,temp3l;
+ reald xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl;
+ reald dpotentialdxl,dpotentialdyl,dpotentialdzl;
+ reald dpotentialdx_with_rot,dpotentialdy_with_rot;
+ reald fac1,fac2,fac3;
+ reald sum_terms;
+ reald gravity_term;
+ reald gxl,gyl,gzl;
+ reald radius,theta,phi;
+ reald cos_theta,sin_theta,cos_phi,sin_phi;
+ reald grad_x_ln_rho,grad_y_ln_rho,grad_z_ln_rho;
+ int int_radius;
+
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+ int l;
+ int offset1,offset2,offset3;
+ realw hp1,hp2,hp3;
+#endif
+
+ __shared__ reald s_dummy_loc[NGLL3];
+
+ __shared__ reald s_temp1[NGLL3];
+ __shared__ reald s_temp2[NGLL3];
+ __shared__ reald s_temp3[NGLL3];
+
+// use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
+// because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
+ active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
+
+// copy from global memory to shared memory
+// each thread writes one of the NGLL^3 = 125 data points
+ if (active) {
+
+#ifdef USE_MESH_COLORING_GPU
+ working_element = bx;
+#else
+ //mesh coloring
+ if( use_mesh_coloring_gpu ){
+ working_element = bx;
+ }else{
+ // iphase-1 and working_element-1 for Fortran->C array conventions
+ working_element = d_phase_ispec_inner[bx + num_phase_ispec*(d_iphase-1)]-1;
+ }
+#endif
+
+ // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
+ iglob = d_ibool[working_element*NGLL3 + tx]-1;
+
+#ifdef USE_TEXTURES
+ s_dummy_loc[tx] = tex1Dfetch(tex_potential, iglob);
+#else
+ // changing iglob indexing to match fortran row changes fast style
+ s_dummy_loc[tx] = d_potential[iglob];
+#endif
+ }
+
+// synchronize all the threads (one thread for each of the NGLL grid points of the
+// current spectral element) because we need the whole element to be ready in order
+// to be able to compute the matrix products along cut planes of the 3D element below
+ __syncthreads();
+
+#ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+
+ if (active) {
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// if(iglob == 0 )printf("kernel 2: iglob %i hprime_xx %f %f %f \n",iglob,hprime_xx[0],hprime_xx[1],hprime_xx[2]);
+#endif
+
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+
+ temp1l = 0.f;
+ temp2l = 0.f;
+ temp3l = 0.f;
+
+ for (l=0;l<NGLLX;l++) {
+ hp1 = hprime_xx[l*NGLLX+I];
+ offset1 = K*NGLL2+J*NGLLX+l;
+ temp1l += s_dummy_loc[offset1]*hp1;
+
+ //no more assumes that hprime_xx = hprime_yy = hprime_zz
+ hp2 = hprime_yy[l*NGLLX+J];
+ offset2 = K*NGLL2+l*NGLLX+I;
+ temp2l += s_dummy_loc[offset2]*hp2;
+
+ hp3 = hprime_zz[l*NGLLX+K];
+ offset3 = l*NGLL2+J*NGLLX+I;
+ temp3l += s_dummy_loc[offset3]*hp3;
+ }
+#else
+
+ temp1l = s_dummy_loc[K*NGLL2+J*NGLLX]*hprime_xx[I]
+ + s_dummy_loc[K*NGLL2+J*NGLLX+1]*hprime_xx[NGLLX+I]
+ + s_dummy_loc[K*NGLL2+J*NGLLX+2]*hprime_xx[2*NGLLX+I]
+ + s_dummy_loc[K*NGLL2+J*NGLLX+3]*hprime_xx[3*NGLLX+I]
+ + s_dummy_loc[K*NGLL2+J*NGLLX+4]*hprime_xx[4*NGLLX+I];
+
+ temp2l = s_dummy_loc[K*NGLL2+I]*hprime_yy[J]
+ + s_dummy_loc[K*NGLL2+NGLLX+I]*hprime_yy[NGLLX+J]
+ + s_dummy_loc[K*NGLL2+2*NGLLX+I]*hprime_yy[2*NGLLX+J]
+ + s_dummy_loc[K*NGLL2+3*NGLLX+I]*hprime_yy[3*NGLLX+J]
+ + s_dummy_loc[K*NGLL2+4*NGLLX+I]*hprime_yy[4*NGLLX+J];
+
+ temp3l = s_dummy_loc[J*NGLLX+I]*hprime_zz[K]
+ + s_dummy_loc[NGLL2+J*NGLLX+I]*hprime_zz[NGLLX+K]
+ + s_dummy_loc[2*NGLL2+J*NGLLX+I]*hprime_zz[2*NGLLX+K]
+ + s_dummy_loc[3*NGLL2+J*NGLLX+I]*hprime_zz[3*NGLLX+K]
+ + s_dummy_loc[4*NGLL2+J*NGLLX+I]*hprime_zz[4*NGLLX+K];
+
+#endif
+
+ // compute derivatives of ux, uy and uz with respect to x, y and z
+ offset = working_element*NGLL3_ALIGN + tx;
+
+ xixl = d_xix[offset];
+ xiyl = d_xiy[offset];
+ xizl = d_xiz[offset];
+ etaxl = d_etax[offset];
+ etayl = d_etay[offset];
+ etazl = d_etaz[offset];
+ gammaxl = d_gammax[offset];
+ gammayl = d_gammay[offset];
+ gammazl = d_gammaz[offset];
+
+ // compute the jacobian
+ jacobianl = 1.f / (xixl*(etayl*gammazl-etazl*gammayl)
+ -xiyl*(etaxl*gammazl-etazl*gammaxl)
+ +xizl*(etaxl*gammayl-etayl*gammaxl));
+
+ // derivatives of potential
+ dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l;
+ dpotentialdyl = xiyl*temp1l + etayl*temp2l + gammayl*temp3l;
+ dpotentialdzl = xizl*temp1l + etazl*temp2l + gammazl*temp3l;
+
+ // compute contribution of rotation and add to gradient of potential
+ // this term has no Z component
+ if(ROTATION){
+ compute_element_oc_rotation(tx,working_element,time,two_omega_earth,deltat,
+ d_A_array_rotation,d_B_array_rotation,
+ dpotentialdxl,dpotentialdyl,
+ &dpotentialdx_with_rot,&dpotentialdy_with_rot);
+
+ }else{
+ dpotentialdx_with_rot = dpotentialdxl;
+ dpotentialdy_with_rot = dpotentialdyl;
+ }
+
+ // pre-computes gravity terms
+
+ // use mesh coordinates to get theta and phi
+ // x y z contain r theta phi
+ radius = d_xstore[iglob];
+ theta = d_ystore[iglob];
+ phi = d_zstore[iglob];
+
+ cos_theta = cos(theta);
+ sin_theta = sin(theta);
+ cos_phi = cos(phi);
+ sin_phi = sin(phi);
+
+ // for efficiency replace with lookup table every 100 m in radial direction
+ // note: radius in outer core should never be zero,
+ // and arrays in C start from 0, thus we need to subtract -1
+ int_radius = rint(radius * R_EARTH_KM * 10.0f ) - 1;
+
+ // depending on gravity or not, different potential definitions are used
+ if( ! GRAVITY ){
+ // add (chi/rho)grad(rho) term in no gravity case
+
+ // grad(rho)/rho in Cartesian components
+ grad_x_ln_rho = sin_theta * cos_phi * d_d_ln_density_dr_table[int_radius];
+ grad_y_ln_rho = sin_theta * sin_phi * d_d_ln_density_dr_table[int_radius];
+ grad_z_ln_rho = cos_theta * d_d_ln_density_dr_table[int_radius];
+
+ // adding (chi/rho)grad(rho)
+ dpotentialdx_with_rot = dpotentialdx_with_rot + s_dummy_loc[tx] * grad_x_ln_rho;
+ dpotentialdy_with_rot = dpotentialdy_with_rot + s_dummy_loc[tx] * grad_y_ln_rho;
+ dpotentialdzl = dpotentialdzl + s_dummy_loc[tx] * grad_z_ln_rho;
+
+ }else{
+
+ // compute divergence of displacement
+ // precompute and store gravity term
+ //
+ // get g, rho and dg/dr=dg
+ // spherical components of the gravitational acceleration
+ //
+ // Cartesian components of the gravitational acceleration
+ // integrate and multiply by rho / Kappa
+ gxl = sin_theta*cos_phi;
+ gyl = sin_theta*sin_phi;
+ gzl = cos_theta;
+
+ // uses potential definition: s = grad(chi)
+ // gravity term: - rho * g * 1/kappa grad(chi)
+
+ gravity_term = d_minus_rho_g_over_kappa_fluid[int_radius] * jacobianl * wgll_cube[tx] *
+ ( dpotentialdx_with_rot * gxl + dpotentialdy_with_rot * gyl + dpotentialdzl * gzl);
+
+ // divergence of displacement field with gravity on
+ // note: these calculations are only considered for SIMULATION_TYPE == 1 .and. SAVE_FORWARD
+ // and one has set MOVIE_VOLUME_TYPE == 4 when MOVIE_VOLUME is .true.;
+ // in case of SIMULATION_TYPE == 3, it gets overwritten by compute_kernels_outer_core()
+ //if (NSPEC_OUTER_CORE_ADJOINT /= 1 && MOVIE_VOLUME ){
+ // div_displfluid(i,j,k,ispec) = d_minus_rho_g_over_kappa_fluid[int_radius] *
+ // (dpotentialdx_with_rot * gxl + dpotentialdy_with_rot * gyl + dpotentialdzl * gzl);
+ //}
+
+ }
+
+ // form the dot product with the test vector
+ s_temp1[tx] = jacobianl*(xixl*dpotentialdx_with_rot + xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl);
+ s_temp2[tx] = jacobianl*(etaxl*dpotentialdx_with_rot + etayl*dpotentialdy_with_rot + etazl*dpotentialdzl);
+ s_temp3[tx] = jacobianl*(gammaxl*dpotentialdx_with_rot + gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl);
+ }
+
+// synchronize all the threads (one thread for each of the NGLL grid points of the
+// current spectral element) because we need the whole element to be ready in order
+// to be able to compute the matrix products along cut planes of the 3D element below
+ __syncthreads();
+
+ if (active) {
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+
+ temp1l = 0.f;
+ temp2l = 0.f;
+ temp3l = 0.f;
+
+ for (l=0;l<NGLLX;l++) {
+ fac1 = hprimewgll_xx[I*NGLLX+l];
+ offset1 = K*NGLL2+J*NGLLX+l;
+ temp1l += s_temp1[offset1]*fac1;
+
+ //no more assumes hprimewgll_xx = hprimewgll_yy = hprimewgll_zz
+ fac2 = hprimewgll_yy[J*NGLLX+l];
+ offset2 = K*NGLL2+l*NGLLX+I;
+ temp2l += s_temp2[offset2]*fac2;
+
+ fac3 = hprimewgll_zz[K*NGLLX+l];
+ offset3 = l*NGLL2+J*NGLLX+I;
+ temp3l += s_temp3[offset3]*fac3;
+ }
+#else
+
+ temp1l = s_temp1[K*NGLL2+J*NGLLX]*hprimewgll_xx[I*NGLLX]
+ + s_temp1[K*NGLL2+J*NGLLX+1]*hprimewgll_xx[I*NGLLX+1]
+ + s_temp1[K*NGLL2+J*NGLLX+2]*hprimewgll_xx[I*NGLLX+2]
+ + s_temp1[K*NGLL2+J*NGLLX+3]*hprimewgll_xx[I*NGLLX+3]
+ + s_temp1[K*NGLL2+J*NGLLX+4]*hprimewgll_xx[I*NGLLX+4];
+
+
+ temp2l = s_temp2[K*NGLL2+I]*hprimewgll_yy[J*NGLLX]
+ + s_temp2[K*NGLL2+NGLLX+I]*hprimewgll_yy[J*NGLLX+1]
+ + s_temp2[K*NGLL2+2*NGLLX+I]*hprimewgll_yy[J*NGLLX+2]
+ + s_temp2[K*NGLL2+3*NGLLX+I]*hprimewgll_yy[J*NGLLX+3]
+ + s_temp2[K*NGLL2+4*NGLLX+I]*hprimewgll_yy[J*NGLLX+4];
+
+
+ temp3l = s_temp3[J*NGLLX+I]*hprimewgll_zz[K*NGLLX]
+ + s_temp3[NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+1]
+ + s_temp3[2*NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+2]
+ + s_temp3[3*NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+3]
+ + s_temp3[4*NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+4];
+
+
+#endif
+
+ fac1 = wgllwgll_yz[K*NGLLX+J];
+ fac2 = wgllwgll_xz[K*NGLLX+I];
+ fac3 = wgllwgll_xy[J*NGLLX+I];
+
+ sum_terms = -(fac1*temp1l + fac2*temp2l + fac3*temp3l);
+ if( GRAVITY ) sum_terms += gravity_term;
+
+ iglob = d_ibool[working_element*NGLL3 + tx]-1;
+
+#ifdef USE_TEXTURES
+ d_potential_dot_dot[iglob] = tex1Dfetch(tex_potential_dot_dot, iglob)
+ + sum_terms;
+#else
+
+#ifdef USE_MESH_COLORING_GPU
+ // no atomic operation needed, colors don't share global points between elements
+ d_potential_dot_dot[iglob] += sum_terms;
+#else
+ //mesh coloring
+ if( use_mesh_coloring_gpu ){
+
+ // no atomic operation needed, colors don't share global points between elements
+ d_potential_dot_dot[iglob] += sum_terms;
+
+ }else{
+
+ atomicAdd(&d_potential_dot_dot[iglob],sum_terms);
+
+ }
+#endif
+
+#endif
+ }
+
+#else // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+ d_potential_dot_dot[iglob] = 123.123f;
+#endif // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void Kernel_2_outer_core(int nb_blocks_to_compute, Mesh* mp,
+ int d_iphase,
+ int* d_ibool,
+ realw* d_xix,realw* d_xiy,realw* d_xiz,
+ realw* d_etax,realw* d_etay,realw* d_etaz,
+ realw* d_gammax,realw* d_gammay,realw* d_gammaz,
+ realw time, realw b_time,
+ realw* d_A_array_rotation,realw* d_B_array_rotation,
+ realw* d_b_A_array_rotation,realw* d_b_B_array_rotation
+ ){
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("before outer_core kernel Kernel_2");
+#endif
+
+ /* if the grid can handle the number of blocks, we let it be 1D */
+ /* grid_2_x = nb_elem_color; */
+ /* nb_elem_color is just how many blocks we are computing now */
+
+ int num_blocks_x = nb_blocks_to_compute;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ int threads_2 = NGLL3_PADDED;//BLOCK_SIZE_K2;
+ dim3 grid_2(num_blocks_x,num_blocks_y);
+
+ // Cuda timing
+ // cudaEvent_t start, stop;
+ // realw time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
+ // cudaEventRecord( start, 0 );
+
+ Kernel_2_outer_core_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
+ mp->NGLOB_OUTER_CORE,
+ d_ibool,
+ mp->d_phase_ispec_inner_outer_core,
+ mp->num_phase_ispec_outer_core,
+ d_iphase,
+ mp->use_mesh_coloring_gpu,
+ mp->d_displ_outer_core,
+ mp->d_accel_outer_core,
+ d_xix, d_xiy, d_xiz,
+ d_etax, d_etay, d_etaz,
+ d_gammax, d_gammay, d_gammaz,
+ mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
+ mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
+ mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+ mp->gravity,
+ mp->d_xstore_outer_core,mp->d_ystore_outer_core,mp->d_zstore_outer_core,
+ mp->d_d_ln_density_dr_table,
+ mp->d_minus_rho_g_over_kappa_fluid,
+ mp->d_wgll_cube,
+ mp->rotation,
+ time,
+ mp->d_two_omega_earth,
+ mp->d_deltat,
+ d_A_array_rotation,d_B_array_rotation);
+
+ if(mp->simulation_type == 3) {
+ Kernel_2_outer_core_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
+ mp->NGLOB_OUTER_CORE,
+ d_ibool,
+ mp->d_phase_ispec_inner_outer_core,
+ mp->num_phase_ispec_outer_core,
+ d_iphase,
+ mp->use_mesh_coloring_gpu,
+ mp->d_b_displ_outer_core,
+ mp->d_b_accel_outer_core,
+ d_xix, d_xiy, d_xiz,
+ d_etax, d_etay, d_etaz,
+ d_gammax, d_gammay, d_gammaz,
+ mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
+ mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
+ mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+ mp->gravity,
+ mp->d_xstore_outer_core,mp->d_ystore_outer_core,mp->d_zstore_outer_core,
+ mp->d_d_ln_density_dr_table,
+ mp->d_minus_rho_g_over_kappa_fluid,
+ mp->d_wgll_cube,
+ mp->rotation,
+ b_time,
+ mp->d_b_two_omega_earth,
+ mp->d_b_deltat,
+ d_b_A_array_rotation,d_b_B_array_rotation);
+ }
+
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("Kernel2 Execution Time: %f ms\n",time);
+
+ /* cudaThreadSynchronize(); */
+ /* TRACE("Kernel 2 finished"); */
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("Tried to start with %dx1 blocks\n",nb_blocks_to_compute);
+ exit_on_cuda_error("kernel Kernel_2_outer_core");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// main compute_forces_outer_core CUDA routine
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_forces_outer_core_cuda,
+ COMPUTE_FORCES_OUTER_CORE_CUDA)(long* Mesh_pointer_f,
+ int* iphase,
+ realw* time_f,
+ realw* b_time_f) {
+
+ TRACE("compute_forces_outer_core_cuda");
+
+//daniel: debug
+ //printf("Running compute_forces_outer_core_cuda\n");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ int num_elements;
+ realw time = *time_f;
+ realw b_time = *b_time_f;
+
+ if( *iphase == 1 )
+ num_elements = mp->nspec_outer_outer_core;
+ else
+ num_elements = mp->nspec_inner_outer_core;
+
+ if( num_elements == 0 ) return;
+
+ // mesh coloring
+ if( mp->use_mesh_coloring_gpu ){
+
+ // note: array offsets require sorted arrays, such that e.g. ibool starts with elastic elements
+ // and followed by acoustic ones.
+ // acoustic elements also start with outer than inner element ordering
+
+ int nb_colors,nb_blocks_to_compute;
+ int istart;
+ int color_offset,color_offset_nonpadded;
+
+ // sets up color loop
+ if( *iphase == 1 ){
+ // outer elements
+ nb_colors = mp->num_colors_outer_outer_core;
+ istart = 0;
+
+ // array offsets (acoustic elements start after elastic ones)
+ color_offset = mp->nspec_elastic * NGLL3_PADDED;
+ color_offset_nonpadded = mp->nspec_elastic * NGLL3;
+ }else{
+ // inner element colors (start after outer elements)
+ nb_colors = mp->num_colors_outer_outer_core + mp->num_colors_inner_outer_core;
+ istart = mp->num_colors_outer_outer_core;
+
+ // array offsets (inner elements start after outer ones)
+ color_offset = mp->nspec_outer_outer_core * NGLL3_PADDED;
+ color_offset_nonpadded = mp->nspec_outer_outer_core * NGLL3;
+ }
+
+ // loops over colors
+ for(int icolor = istart; icolor < nb_colors; icolor++){
+
+ nb_blocks_to_compute = mp->h_num_elem_colors_outer_core[icolor];
+
+ Kernel_2_outer_core(nb_blocks_to_compute,mp,
+ *iphase,
+ mp->d_ibool_outer_core + color_offset_nonpadded,
+ mp->d_xix_outer_core + color_offset,
+ mp->d_xiy_outer_core + color_offset,
+ mp->d_xiz_outer_core + color_offset,
+ mp->d_etax_outer_core + color_offset,
+ mp->d_etay_outer_core + color_offset,
+ mp->d_etaz_outer_core + color_offset,
+ mp->d_gammax_outer_core + color_offset,
+ mp->d_gammay_outer_core + color_offset,
+ mp->d_gammaz_outer_core + color_offset,
+ time,b_time,
+ mp->d_A_array_rotation + color_offset_nonpadded,
+ mp->d_B_array_rotation + color_offset_nonpadded,
+ mp->d_b_A_array_rotation + color_offset_nonpadded,
+ mp->d_b_B_array_rotation + color_offset_nonpadded
+ );
+
+ // for padded and aligned arrays
+ color_offset += nb_blocks_to_compute * NGLL3_PADDED;
+ // for no-aligned arrays
+ color_offset_nonpadded += nb_blocks_to_compute * NGLL3;
+ }
+
+ }else{
+
+ // no mesh coloring: uses atomic updates
+ Kernel_2_outer_core(num_elements, mp,
+ *iphase,
+ mp->d_ibool_outer_core,
+ mp->d_xix_outer_core,mp->d_xiy_outer_core,mp->d_xiz_outer_core,
+ mp->d_etax_outer_core,mp->d_etay_outer_core,mp->d_etaz_outer_core,
+ mp->d_gammax_outer_core,mp->d_gammay_outer_core,mp->d_gammaz_outer_core,
+ time,b_time,
+ mp->d_A_array_rotation,mp->d_B_array_rotation,
+ mp->d_b_A_array_rotation,mp->d_b_B_array_rotation
+ );
+
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("compute_forces_outer_core_cuda");
+#endif
+}
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,646 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ELASTIC SIMULATIONS
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_kernels_cudakernel(int* ispec_is_elastic,
+ int* ibool,
+ realw* accel,
+ realw* b_displ,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz,
+ realw* b_epsilondev_xx,
+ realw* b_epsilondev_yy,
+ realw* b_epsilondev_xy,
+ realw* b_epsilondev_xz,
+ realw* b_epsilondev_yz,
+ realw* rho_kl,
+ realw deltat,
+ realw* mu_kl,
+ realw* kappa_kl,
+ realw* epsilon_trace_over_3,
+ realw* b_epsilon_trace_over_3,
+ int NSPEC_AB) {
+
+ int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+
+ // handles case when there is 1 extra block (due to rectangular grid)
+ if(ispec < NSPEC_AB) {
+
+ // elastic elements only
+ if( ispec_is_elastic[ispec] ) {
+
+ int ijk = threadIdx.x;
+ int ijk_ispec = ijk + NGLL3*ispec;
+ int iglob = ibool[ijk_ispec] - 1 ;
+
+ // isotropic kernels:
+ // density kernel
+ rho_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_displ[3*iglob]+
+ accel[3*iglob+1]*b_displ[3*iglob+1]+
+ accel[3*iglob+2]*b_displ[3*iglob+2]);
+
+
+ // shear modulus kernel
+ mu_kl[ijk_ispec] += deltat * (epsilondev_xx[ijk_ispec]*b_epsilondev_xx[ijk_ispec]+
+ epsilondev_yy[ijk_ispec]*b_epsilondev_yy[ijk_ispec]+
+ (epsilondev_xx[ijk_ispec]+epsilondev_yy[ijk_ispec])*
+ (b_epsilondev_xx[ijk_ispec]+b_epsilondev_yy[ijk_ispec])+
+ 2*(epsilondev_xy[ijk_ispec]*b_epsilondev_xy[ijk_ispec]+
+ epsilondev_xz[ijk_ispec]*b_epsilondev_xz[ijk_ispec]+
+ epsilondev_yz[ijk_ispec]*b_epsilondev_yz[ijk_ispec]));
+
+ // bulk modulus kernel
+ kappa_kl[ijk_ispec] += deltat*(9*epsilon_trace_over_3[ijk_ispec]*
+ b_epsilon_trace_over_3[ijk_ispec]);
+
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_kernels_elastic_cuda,
+ COMPUTE_KERNELS_ELASTIC_CUDA)(long* Mesh_pointer,
+ realw* deltat_f) {
+TRACE("compute_kernels_elastic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ int blocksize = NGLL3; // NGLLX*NGLLY*NGLLZ
+ realw deltat = *deltat_f;
+
+ int num_blocks_x = mp->NSPEC_AB;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ compute_kernels_cudakernel<<<grid,threads>>>(mp->d_ispec_is_elastic,mp->d_ibool,
+ mp->d_accel, mp->d_b_displ,
+ mp->d_epsilondev_xx,
+ mp->d_epsilondev_yy,
+ mp->d_epsilondev_xy,
+ mp->d_epsilondev_xz,
+ mp->d_epsilondev_yz,
+ mp->d_b_epsilondev_xx,
+ mp->d_b_epsilondev_yy,
+ mp->d_b_epsilondev_xy,
+ mp->d_b_epsilondev_xz,
+ mp->d_b_epsilondev_yz,
+ mp->d_rho_kl,
+ deltat,
+ mp->d_mu_kl,
+ mp->d_kappa_kl,
+ mp->d_epsilon_trace_over_3,
+ mp->d_b_epsilon_trace_over_3,
+ mp->NSPEC_AB);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_kernels_elastic_cuda");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// NOISE SIMULATIONS
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void compute_kernels_strength_noise_cuda_kernel(realw* displ,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* ibool,
+ realw* noise_surface_movie,
+ realw* normal_x_noise,
+ realw* normal_y_noise,
+ realw* normal_z_noise,
+ realw* Sigma_kl,
+ realw deltat,
+ int num_free_surface_faces) {
+ int iface = blockIdx.x + blockIdx.y*gridDim.x;
+
+ if(iface < num_free_surface_faces) {
+
+ int ispec = free_surface_ispec[iface]-1;
+ int igll = threadIdx.x;
+ int ipoin = igll + NGLL2*iface;
+ int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1 ;
+ int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
+ int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
+
+ int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1 ;
+
+ realw eta = ( noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)]*normal_x_noise[ipoin]+
+ noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)]*normal_y_noise[ipoin]+
+ noise_surface_movie[INDEX3(NDIM,NGLL2,2,igll,iface)]*normal_z_noise[ipoin]);
+
+ Sigma_kl[INDEX4(5,5,5,i,j,k,ispec)] += deltat*eta*(normal_x_noise[ipoin]*displ[3*iglob]+
+ normal_y_noise[ipoin]*displ[1+3*iglob]+
+ normal_z_noise[ipoin]*displ[2+3*iglob]);
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_kernels_strgth_noise_cu,
+ COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer,
+ realw* h_noise_surface_movie,
+ realw* deltat) {
+
+TRACE("compute_kernels_strgth_noise_cu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(mp->d_noise_surface_movie,h_noise_surface_movie,
+ 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw),cudaMemcpyHostToDevice);
+
+
+ int num_blocks_x = mp->num_free_surface_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(NGLL2,1,1);
+
+ compute_kernels_strength_noise_cuda_kernel<<<grid,threads>>>(mp->d_displ,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ mp->d_ibool,
+ mp->d_noise_surface_movie,
+ mp->d_normal_x_noise,
+ mp->d_normal_y_noise,
+ mp->d_normal_z_noise,
+ mp->d_Sigma_kl,*deltat,
+ mp->num_free_surface_faces);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_kernels_strength_noise_cuda_kernel");
+#endif
+}
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ACOUSTIC SIMULATIONS
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__device__ void compute_gradient_kernel(int ijk,
+ int ispec,
+ realw* scalar_field,
+ realw* vector_field_element,
+ realw* hprime_xx,
+ realw* hprime_yy,
+ realw* hprime_zz,
+ realw* d_xix,
+ realw* d_xiy,
+ realw* d_xiz,
+ realw* d_etax,
+ realw* d_etay,
+ realw* d_etaz,
+ realw* d_gammax,
+ realw* d_gammay,
+ realw* d_gammaz,
+ realw rhol,
+ int gravity) {
+
+ realw temp1l,temp2l,temp3l;
+ realw hp1,hp2,hp3;
+ realw xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl;
+ realw rho_invl;
+ int l,offset,offset1,offset2,offset3;
+
+ //const int NGLLX = 5;
+ const int NGLL3_ALIGN = NGLL3_PADDED;
+
+ int K = (ijk/NGLL2);
+ int J = ((ijk-K*NGLL2)/NGLLX);
+ int I = (ijk-K*NGLL2-J*NGLLX);
+
+ // derivative along x
+ temp1l = 0.f;
+ for( l=0; l<NGLLX;l++){
+ hp1 = hprime_xx[l*NGLLX+I];
+ offset1 = K*NGLL2+J*NGLLX+l;
+ temp1l += scalar_field[offset1]*hp1;
+ }
+
+ // derivative along y
+ temp2l = 0.f;
+ for( l=0; l<NGLLX;l++){
+ hp2 = hprime_yy[l*NGLLX+J];
+ offset2 = K*NGLL2+l*NGLLX+I;
+ temp2l += scalar_field[offset2]*hp2;
+ }
+
+ // derivative along z
+ temp3l = 0.f;
+ for( l=0; l<NGLLX;l++){
+ hp3 = hprime_zz[l*NGLLX+K];
+ offset3 = l*NGLL2+J*NGLLX+I;
+ temp3l += scalar_field[offset3]*hp3;
+
+ }
+
+ offset = ispec*NGLL3_ALIGN + ijk;
+
+ xixl = d_xix[offset];
+ xiyl = d_xiy[offset];
+ xizl = d_xiz[offset];
+ etaxl = d_etax[offset];
+ etayl = d_etay[offset];
+ etazl = d_etaz[offset];
+ gammaxl = d_gammax[offset];
+ gammayl = d_gammay[offset];
+ gammazl = d_gammaz[offset];
+
+ if( gravity ){
+ // daniel: TODO - check gravity case here
+ rho_invl = 1.0f / rhol;
+ }else{
+ rho_invl = 1.0f / rhol;
+ }
+ // derivatives of acoustic scalar potential field on GLL points
+ vector_field_element[0] = (temp1l*xixl + temp2l*etaxl + temp3l*gammaxl) * rho_invl;
+ vector_field_element[1] = (temp1l*xiyl + temp2l*etayl + temp3l*gammayl) * rho_invl;
+ vector_field_element[2] = (temp1l*xizl + temp2l*etazl + temp3l*gammazl) * rho_invl;
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void compute_kernels_acoustic_kernel(int* ispec_is_acoustic,
+ int* ibool,
+ realw* rhostore,
+ realw* kappastore,
+ realw* hprime_xx,
+ realw* hprime_yy,
+ realw* hprime_zz,
+ realw* d_xix,
+ realw* d_xiy,
+ realw* d_xiz,
+ realw* d_etax,
+ realw* d_etay,
+ realw* d_etaz,
+ realw* d_gammax,
+ realw* d_gammay,
+ realw* d_gammaz,
+ realw* potential_dot_dot_acoustic,
+ realw* b_potential_acoustic,
+ realw* b_potential_dot_dot_acoustic,
+ realw* rho_ac_kl,
+ realw* kappa_ac_kl,
+ realw deltat,
+ int NSPEC_AB,
+ int gravity) {
+
+ int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+
+ // handles case when there is 1 extra block (due to rectangular grid)
+ if( ispec < NSPEC_AB ){
+
+ // acoustic elements only
+ if( ispec_is_acoustic[ispec] ) {
+
+ int ijk = threadIdx.x;
+
+ // local and global indices
+ int ijk_ispec = ijk + NGLL3*ispec;
+ int ijk_ispec_padded = ijk + NGLL3_PADDED*ispec;
+ int iglob = ibool[ijk_ispec] - 1;
+
+ realw accel_elm[3];
+ realw b_displ_elm[3];
+ realw rhol,kappal;
+
+ // shared memory between all threads within this block
+ __shared__ realw scalar_field_displ[NGLL3];
+ __shared__ realw scalar_field_accel[NGLL3];
+
+ // copy field values
+ scalar_field_displ[ijk] = b_potential_acoustic[iglob];
+ scalar_field_accel[ijk] = potential_dot_dot_acoustic[iglob];
+ __syncthreads();
+
+ // gets material parameter
+ rhol = rhostore[ijk_ispec_padded];
+
+ // displacement vector from backward field
+ compute_gradient_kernel(ijk,ispec,scalar_field_displ,b_displ_elm,
+ hprime_xx,hprime_yy,hprime_zz,
+ d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
+ rhol,gravity);
+
+ // acceleration vector
+ compute_gradient_kernel(ijk,ispec,scalar_field_accel,accel_elm,
+ hprime_xx,hprime_yy,hprime_zz,
+ d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
+ rhol,gravity);
+
+ // density kernel
+ rho_ac_kl[ijk_ispec] -= deltat * rhol * (accel_elm[0]*b_displ_elm[0] +
+ accel_elm[1]*b_displ_elm[1] +
+ accel_elm[2]*b_displ_elm[2]);
+
+ // bulk modulus kernel
+ kappal = kappastore[ijk_ispec];
+ kappa_ac_kl[ijk_ispec] -= deltat / kappal * potential_dot_dot_acoustic[iglob]
+ * b_potential_dot_dot_acoustic[iglob];
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(compute_kernels_acoustic_cuda,
+ COMPUTE_KERNELS_ACOUSTIC_CUDA)(
+ long* Mesh_pointer,
+ realw* deltat_f) {
+
+TRACE("compute_kernels_acoustic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ int blocksize = NGLL3; // NGLLX*NGLLY*NGLLZ
+ realw deltat = *deltat_f;
+
+ int num_blocks_x = mp->NSPEC_AB;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ compute_kernels_acoustic_kernel<<<grid,threads>>>(mp->d_ispec_is_acoustic,
+ mp->d_ibool,
+ mp->d_rhostore,
+ mp->d_kappastore,
+ mp->d_hprime_xx,
+ mp->d_hprime_yy,
+ mp->d_hprime_zz,
+ mp->d_xix,
+ mp->d_xiy,
+ mp->d_xiz,
+ mp->d_etax,
+ mp->d_etay,
+ mp->d_etaz,
+ mp->d_gammax,
+ mp->d_gammay,
+ mp->d_gammaz,
+ mp->d_potential_dot_dot_acoustic,
+ mp->d_b_potential_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
+ mp->d_rho_ac_kl,
+ mp->d_kappa_ac_kl,
+ deltat,
+ mp->NSPEC_AB,
+ mp->gravity);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_kernels_acoustic_kernel");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// preconditioner (approximate Hessian kernel)
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_kernels_hess_el_cudakernel(int* ispec_is_elastic,
+ int* ibool,
+ realw* accel,
+ realw* b_accel,
+ realw* hess_kl,
+ realw deltat,
+ int NSPEC_AB) {
+
+ int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+
+ // handles case when there is 1 extra block (due to rectangular grid)
+ if(ispec < NSPEC_AB) {
+
+ // elastic elements only
+ if( ispec_is_elastic[ispec] ) {
+
+ int ijk = threadIdx.x;
+ int ijk_ispec = ijk + NGLL3*ispec;
+ int iglob = ibool[ijk_ispec] - 1 ;
+
+ // approximate hessian
+ hess_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_accel[3*iglob]+
+ accel[3*iglob+1]*b_accel[3*iglob+1]+
+ accel[3*iglob+2]*b_accel[3*iglob+2]);
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_kernels_hess_ac_cudakernel(int* ispec_is_acoustic,
+ int* ibool,
+ realw* potential_dot_dot_acoustic,
+ realw* b_potential_dot_dot_acoustic,
+ realw* rhostore,
+ realw* hprime_xx,
+ realw* hprime_yy,
+ realw* hprime_zz,
+ realw* d_xix,
+ realw* d_xiy,
+ realw* d_xiz,
+ realw* d_etax,
+ realw* d_etay,
+ realw* d_etaz,
+ realw* d_gammax,
+ realw* d_gammay,
+ realw* d_gammaz,
+ realw* hess_kl,
+ realw deltat,
+ int NSPEC_AB,
+ int gravity) {
+
+ int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+
+ // handles case when there is 1 extra block (due to rectangular grid)
+ if(ispec < NSPEC_AB) {
+
+ // acoustic elements only
+ if( ispec_is_acoustic[ispec] ){
+
+ // local and global indices
+ int ijk = threadIdx.x;
+ int ijk_ispec = ijk + NGLL3*ispec;
+ int iglob = ibool[ijk_ispec] - 1 ;
+
+ int ijk_ispec_padded = ijk + NGLL3_PADDED*ispec;
+
+ realw accel_elm[3];
+ realw b_accel_elm[3];
+ realw rhol;
+
+ // shared memory between all threads within this block
+ __shared__ realw scalar_field_accel[NGLL3];
+ __shared__ realw scalar_field_b_accel[NGLL3];
+
+ // copy field values
+ scalar_field_accel[ijk] = potential_dot_dot_acoustic[iglob];
+ scalar_field_b_accel[ijk] = b_potential_dot_dot_acoustic[iglob];
+ __syncthreads();
+
+ // gets material parameter
+ rhol = rhostore[ijk_ispec_padded];
+
+ // acceleration vector
+ compute_gradient_kernel(ijk,ispec,
+ scalar_field_accel,accel_elm,
+ hprime_xx,hprime_yy,hprime_zz,
+ d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
+ rhol,gravity);
+
+ // acceleration vector from backward field
+ compute_gradient_kernel(ijk,ispec,
+ scalar_field_b_accel,b_accel_elm,
+ hprime_xx,hprime_yy,hprime_zz,
+ d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
+ rhol,gravity);
+ // approximates hessian
+ hess_kl[ijk_ispec] += deltat * (accel_elm[0]*b_accel_elm[0] +
+ accel_elm[1]*b_accel_elm[1] +
+ accel_elm[2]*b_accel_elm[2]);
+
+ } // ispec_is_acoustic
+
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_kernels_hess_cuda,
+ COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
+ realw* deltat_f,
+ int* ELASTIC_SIMULATION,
+ int* ACOUSTIC_SIMULATION) {
+ TRACE("compute_kernels_hess_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ int blocksize = NGLL3; // NGLLX*NGLLY*NGLLZ
+ realw deltat = *deltat_f;
+
+ int num_blocks_x = mp->NSPEC_AB;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ if( *ELASTIC_SIMULATION ) {
+ compute_kernels_hess_el_cudakernel<<<grid,threads>>>(mp->d_ispec_is_elastic,
+ mp->d_ibool,
+ mp->d_accel,
+ mp->d_b_accel,
+ mp->d_hess_el_kl,
+ deltat,
+ mp->NSPEC_AB);
+ }
+
+ if( *ACOUSTIC_SIMULATION ) {
+ compute_kernels_hess_ac_cudakernel<<<grid,threads>>>(mp->d_ispec_is_acoustic,
+ mp->d_ibool,
+ mp->d_potential_dot_dot_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
+ mp->d_rhostore,
+ mp->d_hprime_xx,
+ mp->d_hprime_yy,
+ mp->d_hprime_zz,
+ mp->d_xix,
+ mp->d_xiy,
+ mp->d_xiz,
+ mp->d_etax,
+ mp->d_etay,
+ mp->d_etaz,
+ mp->d_gammax,
+ mp->d_gammay,
+ mp->d_gammaz,
+ mp->d_hess_ac_kl,
+ deltat,
+ mp->NSPEC_AB,
+ mp->gravity);
+ }
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_kernels_hess_cuda");
+#endif
+}
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,192 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_stacey_acoustic_kernel(realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
+ int* abs_boundary_ispec,
+ int* abs_boundary_ijk,
+ realw* abs_boundary_jacobian2Dw,
+ int* ibool,
+ realw* rhostore,
+ realw* kappastore,
+ int* ispec_is_inner,
+ int* ispec_is_acoustic,
+ int phase_is_inner,
+ int SIMULATION_TYPE, int SAVE_FORWARD,
+ int num_abs_boundary_faces,
+ realw* b_potential_dot_acoustic,
+ realw* b_potential_dot_dot_acoustic,
+ realw* b_absorb_potential,
+ int gravity) {
+
+ int igll = threadIdx.x;
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
+ int i,j,k,iglob,ispec;
+ realw rhol,kappal,cpl;
+ realw jacobianw;
+ realw vel;
+
+ // don't compute points outside NGLLSQUARE==NGLL2==25
+ // way 2: no further check needed since blocksize = 25
+ if( iface < num_abs_boundary_faces){
+
+ // if(igll<NGLL2 && iface < num_abs_boundary_faces) {
+
+ // "-1" from index values to convert from Fortran-> C indexing
+ ispec = abs_boundary_ispec[iface]-1;
+
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec] ) {
+
+ i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
+ j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
+ k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
+ iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
+ // determines bulk sound speed
+ rhol = rhostore[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+
+ kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
+
+ cpl = sqrt( kappal / rhol );
+
+ // velocity
+ if( gravity ){
+ // daniel: TODO - check gravity and stacey condition here...
+ // uses a potential definition of: s = grad(chi)
+ vel = potential_dot_acoustic[iglob] / rhol ;
+ }else{
+ // uses a potential definition of: s = 1/rho grad(chi)
+ vel = potential_dot_acoustic[iglob] / rhol;
+ }
+
+ // gets associated, weighted jacobian
+ jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
+ // Sommerfeld condition
+ atomicAdd(&potential_dot_dot_acoustic[iglob],-vel*jacobianw/cpl);
+
+ // adjoint simulations
+ if( SIMULATION_TYPE == 3 ){
+ // Sommerfeld condition
+ atomicAdd(&b_potential_dot_dot_acoustic[iglob],-b_absorb_potential[INDEX2(NGLL2,igll,iface)]);
+ }else if( SIMULATION_TYPE == 1 && SAVE_FORWARD ){
+ // saves boundary values
+ b_absorb_potential[INDEX2(NGLL2,igll,iface)] = vel*jacobianw/cpl;
+ }
+ }
+// }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_stacey_acoustic_cuda,
+ COMPUTE_STACEY_ACOUSTIC_CUDA)(
+ long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* SIMULATION_TYPEf,
+ int* SAVE_FORWARDf,
+ realw* h_b_absorb_potential) {
+TRACE("compute_stacey_acoustic_cuda");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ int phase_is_inner = *phase_is_innerf;
+ int SIMULATION_TYPE = *SIMULATION_TYPEf;
+ int SAVE_FORWARD = *SAVE_FORWARDf;
+
+ // way 1: Elapsed time: 4.385948e-03
+ // > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
+ // int blocksize = 32;
+
+ // way 2: Elapsed time: 4.379034e-03
+ // > NGLLSQUARE==NGLL2==25, no further check inside kernel
+ int blocksize = NGLL2;
+
+ int num_blocks_x = mp->d_num_abs_boundary_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // adjoint simulations: reads in absorbing boundary
+ if (SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0 ){
+ // copies array to GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,h_b_absorb_potential,
+ mp->d_b_reclen_potential,cudaMemcpyHostToDevice),7700);
+ }
+
+ compute_stacey_acoustic_kernel<<<grid,threads>>>(mp->d_potential_dot_acoustic,
+ mp->d_potential_dot_dot_acoustic,
+ mp->d_abs_boundary_ispec,
+ mp->d_abs_boundary_ijk,
+ mp->d_abs_boundary_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_rhostore,
+ mp->d_kappastore,
+ mp->d_ispec_is_inner,
+ mp->d_ispec_is_acoustic,
+ phase_is_inner,
+ SIMULATION_TYPE,SAVE_FORWARD,
+ mp->d_num_abs_boundary_faces,
+ mp->d_b_potential_dot_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
+ mp->d_b_absorb_potential,
+ mp->gravity);
+
+ // adjoint simulations: stores absorbed wavefield part
+ if (SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ){
+ // copies array to CPU
+ print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_potential,mp->d_b_absorb_potential,
+ mp->d_b_reclen_potential,cudaMemcpyDeviceToHost),7701);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_stacey_acoustic_kernel");
+#endif
+}
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,214 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_stacey_elastic_kernel(realw* veloc,
+ realw* accel,
+ int* abs_boundary_ispec,
+ int* abs_boundary_ijk,
+ realw* abs_boundary_normal,
+ realw* abs_boundary_jacobian2Dw,
+ int* ibool,
+ realw* rho_vp,
+ realw* rho_vs,
+ int* ispec_is_inner,
+ int* ispec_is_elastic,
+ int phase_is_inner,
+ int SIMULATION_TYPE,
+ int SAVE_FORWARD,
+ int num_abs_boundary_faces,
+ realw* b_accel,
+ realw* b_absorb_field) {
+
+ int igll = threadIdx.x; // tx
+ int iface = blockIdx.x + gridDim.x*blockIdx.y; // bx
+
+ int i,j,k,iglob,ispec;
+ realw vx,vy,vz,vn;
+ realw nx,ny,nz;
+ realw rho_vp_temp,rho_vs_temp;
+ realw tx,ty,tz;
+ realw jacobianw;
+
+ // don't compute points outside NGLLSQUARE==NGLL2==25
+ // way 2: no further check needed since blocksize = 25
+ if( iface < num_abs_boundary_faces){
+
+ //if(igll < NGLL2 && iface < num_abs_boundary_faces) {
+
+ // "-1" from index values to convert from Fortran-> C indexing
+ ispec = abs_boundary_ispec[iface]-1;
+
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] ) {
+
+ i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
+ j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
+ k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
+ iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
+
+ // gets associated velocity
+
+ vx = veloc[iglob*3+0];
+ vy = veloc[iglob*3+1];
+ vz = veloc[iglob*3+2];
+
+ // gets associated normal
+ nx = abs_boundary_normal[INDEX3(NDIM,NGLL2,0,igll,iface)];
+ ny = abs_boundary_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
+ nz = abs_boundary_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
+
+ // // velocity component in normal direction (normal points out of element)
+ vn = vx*nx + vy*ny + vz*nz;
+
+ rho_vp_temp = rho_vp[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+ rho_vs_temp = rho_vs[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+
+ tx = rho_vp_temp*vn*nx + rho_vs_temp*(vx-vn*nx);
+ ty = rho_vp_temp*vn*ny + rho_vs_temp*(vy-vn*ny);
+ tz = rho_vp_temp*vn*nz + rho_vs_temp*(vz-vn*nz);
+
+ jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
+ atomicAdd(&accel[iglob*3],-tx*jacobianw);
+ atomicAdd(&accel[iglob*3+1],-ty*jacobianw);
+ atomicAdd(&accel[iglob*3+2],-tz*jacobianw);
+
+ if(SIMULATION_TYPE == 3) {
+ atomicAdd(&b_accel[iglob*3 ],-b_absorb_field[INDEX3(NDIM,NGLL2,0,igll,iface)]);
+ atomicAdd(&b_accel[iglob*3+1],-b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)]);
+ atomicAdd(&b_accel[iglob*3+2],-b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)]);
+ }
+ else if(SAVE_FORWARD && SIMULATION_TYPE == 1) {
+ b_absorb_field[INDEX3(NDIM,NGLL2,0,igll,iface)] = tx*jacobianw;
+ b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)] = ty*jacobianw;
+ b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)] = tz*jacobianw;
+ } // SIMULATION_TYPE
+ }
+ } // num_abs_boundary_faces
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(compute_stacey_elastic_cuda,
+ COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* SIMULATION_TYPEf,
+ int* SAVE_FORWARDf,
+ realw* h_b_absorb_field) {
+
+TRACE("compute_stacey_elastic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // check
+ if( mp->d_num_abs_boundary_faces == 0 ) return;
+
+ int phase_is_inner = *phase_is_innerf;
+ int SIMULATION_TYPE = *SIMULATION_TYPEf;
+ int SAVE_FORWARD = *SAVE_FORWARDf;
+
+ // way 1
+ // > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
+ //int blocksize = 32;
+
+ // way 2: seems sligthly faster
+ // > NGLLSQUARE==NGLL2==25, no further check inside kernel
+ int blocksize = NGLL2;
+
+ int num_blocks_x = mp->d_num_abs_boundary_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ if(SIMULATION_TYPE == 3 && mp->d_num_abs_boundary_faces > 0) {
+ // The read is done in fortran
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field,h_b_absorb_field,
+ mp->d_b_reclen_field,cudaMemcpyHostToDevice),7700);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("between cudamemcpy and compute_stacey_elastic_kernel");
+#endif
+
+ compute_stacey_elastic_kernel<<<grid,threads>>>(mp->d_veloc,
+ mp->d_accel,
+ mp->d_abs_boundary_ispec,
+ mp->d_abs_boundary_ijk,
+ mp->d_abs_boundary_normal,
+ mp->d_abs_boundary_jacobian2Dw,
+ mp->d_ibool,
+ mp->d_rho_vp,
+ mp->d_rho_vs,
+ mp->d_ispec_is_inner,
+ mp->d_ispec_is_elastic,
+ phase_is_inner,
+ SIMULATION_TYPE,SAVE_FORWARD,
+ mp->d_num_abs_boundary_faces,
+ mp->d_b_accel,
+ mp->d_b_absorb_field);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_stacey_elastic_kernel");
+#endif
+
+ // ! adjoint simulations: stores absorbed wavefield part
+ // if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) &
+ // write(IOABS,rec=it) b_reclen_field,b_absorb_field,b_reclen_field
+
+ if(SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ) {
+ print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_field,mp->d_b_absorb_field,
+ mp->d_b_reclen_field,cudaMemcpyDeviceToHost),7701);
+ // The write is done in fortran
+ // write_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&it);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after compute_stacey_elastic after cudamemcpy");
+#endif
+}
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,506 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+#define CUBLAS_ERROR(s,n) if (s != CUBLAS_STATUS_SUCCESS) { \
+fprintf (stderr, "CUBLAS Memory Write Error @ %d\n",n); \
+exit(EXIT_FAILURE); }
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// elastic wavefield
+
+// KERNEL 1
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void UpdateDispVeloc_kernel(realw* displ,
+ realw* veloc,
+ realw* accel,
+ int size,
+ realw deltat,
+ realw deltatsqover2,
+ realw deltatover2) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ displ[id] = displ[id] + deltat*veloc[id] + deltatsqover2*accel[id];
+ veloc[id] = veloc[id] + deltatover2*accel[id];
+ accel[id] = 0; // can do this using memset...not sure if faster
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(it_update_displacement_cuda,
+ IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f,
+ int* size_F,
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ int* SIMULATION_TYPE,
+ realw* b_deltat_F,
+ realw* b_deltatsqover2_F,
+ realw* b_deltatover2_F) {
+
+TRACE("it_update_displacement_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ //int i,device;
+
+ int size = *size_F;
+ realw deltat = *deltat_F;
+ realw deltatsqover2 = *deltatsqover2_F;
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltat = *b_deltat_F;
+ realw b_deltatsqover2 = *b_deltatsqover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+ //cublasStatus status;
+
+ int blocksize = BLOCKSIZE_KERNEL1;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// exit_on_cuda_error("Before UpdateDispVeloc_kernel");
+//#endif
+
+ //launch kernel
+ UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ,mp->d_veloc,mp->d_accel,
+ size,deltat,deltatsqover2,deltatover2);
+
+ //cudaThreadSynchronize();
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+// // sync and check to catch errors from previous async operations
+// exit_on_cuda_error("UpdateDispVeloc_kernel");
+//#endif
+
+ // kernel for backward fields
+ if(*SIMULATION_TYPE == 3) {
+
+ UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_b_displ,mp->d_b_veloc,mp->d_b_accel,
+ size,b_deltat,b_deltatsqover2,b_deltatover2);
+
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+// exit_on_cuda_error("after SIM_TYPE==3 UpdateDispVeloc_kernel");
+//#endif
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("it_update_displacement_cuda");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// acoustic wavefield
+
+// KERNEL 1
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void UpdatePotential_kernel(realw* potential_acoustic,
+ realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
+ int size,
+ realw deltat,
+ realw deltatsqover2,
+ realw deltatover2) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ potential_acoustic[id] = potential_acoustic[id]
+ + deltat*potential_dot_acoustic[id]
+ + deltatsqover2*potential_dot_dot_acoustic[id];
+
+ potential_dot_acoustic[id] = potential_dot_acoustic[id]
+ + deltatover2*potential_dot_dot_acoustic[id];
+
+ potential_dot_dot_acoustic[id] = 0;
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(it_update_displacement_ac_cuda,
+ it_update_displacement_ac_cuda)(long* Mesh_pointer_f,
+ int* size_F,
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ int* SIMULATION_TYPE,
+ realw* b_deltat_F,
+ realw* b_deltatsqover2_F,
+ realw* b_deltatover2_F) {
+TRACE("it_update_displacement_ac_cuda");
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ //int i,device;
+ int size = *size_F;
+ realw deltat = *deltat_F;
+ realw deltatsqover2 = *deltatsqover2_F;
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltat = *b_deltat_F;
+ realw b_deltatsqover2 = *b_deltatsqover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+ //cublasStatus status;
+
+ int blocksize = BLOCKSIZE_KERNEL1;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ //launch kernel
+ UpdatePotential_kernel<<<grid,threads>>>(mp->d_potential_acoustic,
+ mp->d_potential_dot_acoustic,
+ mp->d_potential_dot_dot_acoustic,
+ size,deltat,deltatsqover2,deltatover2);
+
+ if(*SIMULATION_TYPE == 3) {
+ UpdatePotential_kernel<<<grid,threads>>>(mp->d_b_potential_acoustic,
+ mp->d_b_potential_dot_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
+ size,b_deltat,b_deltatsqover2,b_deltatover2);
+ }
+
+ //cudaThreadSynchronize();
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("it_update_displacement_ac_cuda");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// KERNEL 3
+//
+// crust/mantle and inner core regions
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void kernel_3_cuda_device(realw* veloc,
+ realw* accel, int size,
+ realw deltatover2,
+ realw* rmass) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ accel[3*id] = accel[3*id]*rmass[id];
+ accel[3*id+1] = accel[3*id+1]*rmass[id];
+ accel[3*id+2] = accel[3*id+2]*rmass[id];
+
+ veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
+ veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
+ veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void kernel_3_accel_cuda_device(realw* accel,
+ int size,
+ realw* rmass) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ accel[3*id] = accel[3*id]*rmass[id];
+ accel[3*id+1] = accel[3*id+1]*rmass[id];
+ accel[3*id+2] = accel[3*id+2]*rmass[id];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void kernel_3_veloc_cuda_device(realw* veloc,
+ realw* accel,
+ int size,
+ realw deltatover2) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
+ veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
+ veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(kernel_3_a_cuda,
+ KERNEL_3_A_CUDA)(long* Mesh_pointer,
+ realw* deltatover2_F,
+ int* SIMULATION_TYPE_f,
+ realw* b_deltatover2_F,
+ int* OCEANS) {
+ TRACE("kernel_3_a_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
+
+ int SIMULATION_TYPE = *SIMULATION_TYPE_f;
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+
+ int blocksize = BLOCKSIZE_KERNEL3;
+ int size_padded = ((int)ceil(((double)mp->NGLOB_CRUST_MANTLE)/((double)blocksize)))*blocksize;
+
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // crust/mantle region only
+ // check whether we can update accel and veloc, or only accel at this point
+ if( *OCEANS == 0 ){
+ // updates both, accel and veloc
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_crust_mantle,
+ mp->d_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
+ deltatover2, mp->d_rmass_crust_mantle);
+
+ if(SIMULATION_TYPE == 3) {
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc_crust_mantle,
+ mp->d_b_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
+ b_deltatover2,mp->d_rmass_crust_mantle);
+ }
+ }else{
+ // updates only accel
+ kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
+ mp->d_rmass_crust_mantle);
+
+ if(SIMULATION_TYPE == 3) {
+ kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_b_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
+ mp->d_rmass_crust_mantle);
+ }
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("after kernel_3_a");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(kernel_3_b_cuda,
+ KERNEL_3_B_CUDA)(long* Mesh_pointer,
+ realw* deltatover2_F,
+ int* SIMULATION_TYPE_f,
+ realw* b_deltatover2_F,
+ int* OCEANS) {
+ TRACE("kernel_3_b_cuda");
+ int size_padded,num_blocks_x,num_blocks_y;
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
+
+ int SIMULATION_TYPE = *SIMULATION_TYPE_f;
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+
+ int blocksize = BLOCKSIZE_KERNEL3;
+
+ // crust/mantle region
+ // in case of ocean loads, we still have to update the velocity for crust/mantle region
+ if( *OCEANS ){
+ size_padded = ((int)ceil(((double)mp->NGLOB_CRUST_MANTLE)/((double)blocksize)))*blocksize;
+ num_blocks_x = size_padded/blocksize;
+ num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // updates only veloc at this point
+ kernel_3_veloc_cuda_device<<< grid, threads>>>(mp->d_veloc_crust_mantle,
+ mp->d_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
+ deltatover2);
+
+ if(SIMULATION_TYPE == 3) {
+ kernel_3_veloc_cuda_device<<< grid, threads>>>(mp->d_b_veloc_crust_mantle,
+ mp->d_b_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
+ b_deltatover2);
+ }
+ }
+
+ // inner core
+ size_padded = ((int)ceil(((double)mp->NGLOB_INNER_CORE)/((double)blocksize)))*blocksize;
+ num_blocks_x = size_padded/blocksize;
+ num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // updates both, accel and veloc
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_inner_core,
+ mp->d_accel_inner_core,
+ mp->NGLOB_INNER_CORE,
+ deltatover2, mp->d_rmass_inner_core);
+
+ if(SIMULATION_TYPE == 3) {
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc_inner_core,
+ mp->d_b_accel_inner_core,
+ mp->NGLOB_INNER_CORE,
+ b_deltatover2,mp->d_rmass_inner_core);
+ }
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("after kernel_3_b");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// KERNEL 3
+//
+// for outer_core
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void kernel_3_outer_core_cuda_device(realw* veloc,
+ realw* accel,int size,
+ realw deltatover2,
+ realw* rmass) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ // multiplies pressure with the inverse of the mass matrix
+ accel[id] = accel[id]*rmass[id];
+
+ // Newmark time scheme: corrector term
+ veloc[id] = veloc[id] + deltatover2*accel[id];
+ }
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(kernel_3_outer_core_cuda,
+ KERNEL_3_OUTER_CORE_CUDA)(long* Mesh_pointer,
+ realw* deltatover2_F,
+ int* SIMULATION_TYPE_f,
+ realw* b_deltatover2_F) {
+
+ TRACE("kernel_3_outer_core_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
+
+ int SIMULATION_TYPE = *SIMULATION_TYPE_f;
+ realw deltatover2 = *deltatover2_F;
+ realw b_deltatover2 = *b_deltatover2_F;
+
+ int blocksize = BLOCKSIZE_KERNEL3;
+ int size_padded = ((int)ceil(((double)mp->NGLOB_OUTER_CORE)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ kernel_3_outer_core_cuda_device<<< grid, threads>>>(mp->d_veloc_outer_core,
+ mp->d_accel_outer_core,
+ mp->NGLOB_OUTER_CORE,
+ deltatover2,mp->d_rmass_outer_core);
+
+ if(SIMULATION_TYPE == 3) {
+ kernel_3_outer_core_cuda_device<<< grid, threads>>>(mp->d_b_veloc_outer_core,
+ mp->d_b_accel_outer_core,
+ mp->NGLOB_OUTER_CORE,
+ b_deltatover2,mp->d_rmass_outer_core);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("after kernel_3_outer_core");
+#endif
+}
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,706 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+/* trivia
+
+- for most working arrays we use now "realw" instead of "float" type declarations to make it easier to switch
+ between a real or double precision simulation
+ (matching CUSTOM_REAL == 4 or 8 in fortran routines).
+
+- instead of boolean "logical" declared in fortran routines, in C (or Cuda-C) we have to use "int" variables.
+ ifort / gfortran caveat:
+ to check whether it is true or false, do not check for == 1 to test for true values since ifort just uses
+ non-zero values for true (e.g. can be -1 for true). however, false will be always == 0.
+ thus, rather use: if( var ) {...} for testing if true instead of if( var == 1){...} (alternative: one could use if( var != 0 ){...}
+
+*/
+
+#ifndef GPU_MESH_
+#define GPU_MESH_
+
+#include <sys/types.h>
+#include <unistd.h>
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for debugging and benchmarking
+
+/* ----------------------------------------------------------------------------------------------- */
+
+#define DEBUG 0
+#if DEBUG == 1
+#define TRACE(x) printf("%s\n",x);
+#else
+#define TRACE(x) // printf("%s\n",x);
+#endif
+
+#define MAXDEBUG 0
+#if MAXDEBUG == 1
+#define LOG(x) printf("%s\n",x)
+#define PRINT5(var,offset) for(;print_count<5;print_count++) printf("var(%d)=%2.20f\n",print_count,var[offset+print_count]);
+#define PRINT10(var) if(print_count<10) { printf("var=%1.20e\n",var); print_count++; }
+#define PRINT10i(var) if(print_count<10) { printf("var=%d\n",var); print_count++; }
+#else
+#define LOG(x) // printf("%s\n",x);
+#define PRINT5(var,offset) // for(i=0;i<10;i++) printf("var(%d)=%f\n",i,var[offset+i]);
+#endif
+
+// error checking after cuda function calls
+#define ENABLE_VERY_SLOW_ERROR_CHECKING
+
+// maximum function
+#define MAX(x,y) (((x) < (y)) ? (y) : (x))
+
+// utility functions: defined in check_fields_cuda.cu
+double get_time();
+void get_free_memory(double* free_db, double* used_db, double* total_db);
+void print_CUDA_error_if_any(cudaError_t err, int num);
+void pause_for_debugger(int pause);
+void exit_on_cuda_error(char* kernel_name);
+void exit_on_error(char* info);
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// cuda constant arrays
+
+/* ----------------------------------------------------------------------------------------------- */
+// (must match constants.h definitions)
+
+// dimensions
+#define NDIM 3
+
+// Gauss-Lobatto-Legendre
+#define NGLLX 5
+#define NGLL2 25
+#define NGLL3 125 // no padding: requires same size as in fortran for NGLLX * NGLLY * NGLLZ
+
+// padding: 128 == 2**7 might improve on older graphics cards w/ coalescent memory accesses:
+#define NGLL3_PADDED 128
+// no padding: 125 == 5*5*5 to avoid allocation of extra memory
+//#define NGLL3_PADDED 125
+
+// number of standard linear solids
+#define N_SLS 3
+
+// region ids
+#define IREGION_CRUST_MANTLE 1
+#define IREGION_INNER_CORE 3
+
+/* ----------------------------------------------------------------------------------------------- */
+
+//typedef float real; // type of variables passed into function
+typedef float realw; // type of "working" variables
+
+// double precision temporary variables leads to 10% performance
+// decrease in Kernel_2_impl (not very much..)
+typedef float reald;
+
+// (optional) pre-processing directive used in kernels: if defined check that it is also set in src/shared/constants.h:
+// leads up to ~ 5% performance increase
+//#define USE_MESH_COLORING_GPU
+
+// (optional) unrolling loops
+// leads up to ~1% performance increase
+//#define MANUALLY_UNROLLED_LOOPS
+
+// cuda kernel block size for updating displacements/potential (newmark time scheme)
+// current hardware: 128 is slightly faster than 256 ( ~ 4%)
+#define BLOCKSIZE_KERNEL1 128
+#define BLOCKSIZE_KERNEL3 128
+#define BLOCKSIZE_TRANSFER 256
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// indexing
+
+#define INDEX2(xsize,x,y) x + (y)*xsize
+
+#define INDEX3(xsize,ysize,x,y,z) x + xsize*(y + ysize*z)
+//#define INDEX3(xsize,ysize,x,y,z) x + (y)*xsize + (z)*xsize*ysize
+
+#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + xsize*(y + ysize*(z + zsize*i))
+//#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize
+
+#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + xsize*(y + ysize*(z + zsize*(i + isize*(j))))
+//#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize + (j)*xsize*ysize*zsize*isize
+
+#define INDEX6(xsize,ysize,zsize,isize,jsize,x,y,z,i,j,k) x + xsize*(y + ysize*(z + zsize*(i + isize*(j + jsize*k))))
+
+#define INDEX4_PADDED(xsize,ysize,zsize,x,y,z,i) x + xsize*(y + ysize*z) + (i)*NGLL3_PADDED
+//#define INDEX4_PADDED(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*NGLL3_PADDED
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// mesh pointer wrapper structure
+
+/* ----------------------------------------------------------------------------------------------- */
+
+typedef struct mesh_ {
+
+ // mesh resolution
+ // ------------------------------------------------------------------ //
+ // crust_mantle
+ // ------------------------------------------------------------------ //
+ int NSPEC_CRUST_MANTLE;
+ int NGLOB_CRUST_MANTLE;
+
+ // interpolators
+ realw* d_xix_crust_mantle; realw* d_xiy_crust_mantle; realw* d_xiz_crust_mantle;
+ realw* d_etax_crust_mantle; realw* d_etay_crust_mantle; realw* d_etaz_crust_mantle;
+ realw* d_gammax_crust_mantle; realw* d_gammay_crust_mantle; realw* d_gammaz_crust_mantle;
+
+ // model parameters
+ realw* d_rhostore_crust_mantle;
+ realw* d_kappavstore_crust_mantle; realw* d_muvstore_crust_mantle;
+ realw* d_kappahstore_crust_mantle; realw* d_muhstore_crust_mantle;
+ realw* d_eta_anisostore_crust_mantle;
+ realw* d_rmass_crust_mantle;
+
+ // global indexing
+ int* d_ibool_crust_mantle;
+ int* d_ispec_is_tiso_crust_mantle;
+
+ // mesh locations
+ realw* d_xstore_crust_mantle; realw* d_ystore_crust_mantle; realw* d_zstore_crust_mantle;
+
+ // anisotropic 3D mantle
+ realw* d_c11store_crust_mantle;
+ realw* d_c12store_crust_mantle;
+ realw* d_c13store_crust_mantle;
+ realw* d_c14store_crust_mantle;
+ realw* d_c15store_crust_mantle;
+ realw* d_c16store_crust_mantle;
+ realw* d_c22store_crust_mantle;
+ realw* d_c23store_crust_mantle;
+ realw* d_c24store_crust_mantle;
+ realw* d_c25store_crust_mantle;
+ realw* d_c26store_crust_mantle;
+ realw* d_c33store_crust_mantle;
+ realw* d_c34store_crust_mantle;
+ realw* d_c35store_crust_mantle;
+ realw* d_c36store_crust_mantle;
+ realw* d_c44store_crust_mantle;
+ realw* d_c45store_crust_mantle;
+ realw* d_c46store_crust_mantle;
+ realw* d_c55store_crust_mantle;
+ realw* d_c56store_crust_mantle;
+ realw* d_c66store_crust_mantle;
+
+ // wavefields
+ // displacement, velocity, acceleration
+ realw* d_displ_crust_mantle; realw* d_veloc_crust_mantle; realw* d_accel_crust_mantle;
+ // backward/reconstructed elastic wavefield
+ realw* d_b_displ_crust_mantle; realw* d_b_veloc_crust_mantle; realw* d_b_accel_crust_mantle;
+
+ // attenuation
+ realw* d_R_xx_crust_mantle;
+ realw* d_R_yy_crust_mantle;
+ realw* d_R_xy_crust_mantle;
+ realw* d_R_xz_crust_mantle;
+ realw* d_R_yz_crust_mantle;
+
+ realw* d_b_R_xx_crust_mantle;
+ realw* d_b_R_yy_crust_mantle;
+ realw* d_b_R_xy_crust_mantle;
+ realw* d_b_R_xz_crust_mantle;
+ realw* d_b_R_yz_crust_mantle;
+
+ realw* d_factor_common_crust_mantle;
+ realw* d_one_minus_sum_beta_crust_mantle;
+
+ realw* d_epsilondev_xx_crust_mantle;
+ realw* d_epsilondev_yy_crust_mantle;
+ realw* d_epsilondev_xy_crust_mantle;
+ realw* d_epsilondev_xz_crust_mantle;
+ realw* d_epsilondev_yz_crust_mantle;
+
+ realw* d_b_epsilondev_xx_crust_mantle;
+ realw* d_b_epsilondev_yy_crust_mantle;
+ realw* d_b_epsilondev_xy_crust_mantle;
+ realw* d_b_epsilondev_xz_crust_mantle;
+ realw* d_b_epsilondev_yz_crust_mantle;
+
+ realw* d_eps_trace_over_3_crust_mantle;
+ realw* d_b_eps_trace_over_3_crust_mantle;
+
+ // inner / outer elements
+ int* d_phase_ispec_inner_crust_mantle;
+ int num_phase_ispec_crust_mantle;
+
+ int nspec_outer_crust_mantle;
+ int nspec_inner_crust_mantle;
+
+ int num_colors_inner_crust_mantle;
+ int num_colors_outer_crust_mantle;
+ int* h_num_elem_colors_crust_mantle;
+
+
+ // ------------------------------------------------------------------ //
+ // outer_core
+ // ------------------------------------------------------------------ //
+ int NSPEC_OUTER_CORE;
+ int NGLOB_OUTER_CORE;
+
+ // interpolators
+ realw* d_xix_outer_core; realw* d_xiy_outer_core; realw* d_xiz_outer_core;
+ realw* d_etax_outer_core; realw* d_etay_outer_core; realw* d_etaz_outer_core;
+ realw* d_gammax_outer_core; realw* d_gammay_outer_core; realw* d_gammaz_outer_core;
+
+ // model parameters
+ realw* d_rhostore_outer_core; realw* d_kappavstore_outer_core;
+ realw* d_rmass_outer_core;
+
+ // global indexing
+ int* d_ibool_outer_core;
+
+ // mesh locations
+ realw* d_xstore_outer_core; realw* d_ystore_outer_core; realw* d_zstore_outer_core;
+
+ // wavefields
+ // displacement, velocity, acceleration
+ realw* d_displ_outer_core; realw* d_veloc_outer_core; realw* d_accel_outer_core;
+ // backward/reconstructed elastic wavefield
+ realw* d_b_displ_outer_core; realw* d_b_veloc_outer_core; realw* d_b_accel_outer_core;
+
+ // inner / outer elements
+ int* d_phase_ispec_inner_outer_core;
+ int num_phase_ispec_outer_core;
+
+ int nspec_outer_outer_core;
+ int nspec_inner_outer_core;
+
+ int num_colors_inner_outer_core;
+ int num_colors_outer_outer_core;
+ int* h_num_elem_colors_outer_core;
+
+
+ // ------------------------------------------------------------------ //
+ // inner_core
+ // ------------------------------------------------------------------ //
+ int NSPEC_INNER_CORE;
+ int NGLOB_INNER_CORE;
+
+ // interpolators
+ realw* d_xix_inner_core; realw* d_xiy_inner_core; realw* d_xiz_inner_core;
+ realw* d_etax_inner_core; realw* d_etay_inner_core; realw* d_etaz_inner_core;
+ realw* d_gammax_inner_core; realw* d_gammay_inner_core; realw* d_gammaz_inner_core;
+
+ // model parameters
+ realw* d_rhostore_inner_core;
+ realw* d_kappavstore_inner_core; realw* d_muvstore_inner_core;
+ realw* d_rmass_inner_core;
+
+ // global indexing
+ int* d_ibool_inner_core;
+ int* d_idoubling_inner_core;
+
+ // mesh locations
+ realw* d_xstore_inner_core; realw* d_ystore_inner_core; realw* d_zstore_inner_core;
+
+ // anisotropic 3D mantle
+ realw* d_c11store_inner_core;
+ realw* d_c12store_inner_core;
+ realw* d_c13store_inner_core;
+ realw* d_c33store_inner_core;
+ realw* d_c44store_inner_core;
+
+ // wavefields
+ // displacement, velocity, acceleration
+ realw* d_displ_inner_core; realw* d_veloc_inner_core; realw* d_accel_inner_core;
+ // backward/reconstructed elastic wavefield
+ realw* d_b_displ_inner_core; realw* d_b_veloc_inner_core; realw* d_b_accel_inner_core;
+
+ // attenuation
+ realw* d_R_xx_inner_core;
+ realw* d_R_yy_inner_core;
+ realw* d_R_xy_inner_core;
+ realw* d_R_xz_inner_core;
+ realw* d_R_yz_inner_core;
+
+ realw* d_b_R_xx_inner_core;
+ realw* d_b_R_yy_inner_core;
+ realw* d_b_R_xy_inner_core;
+ realw* d_b_R_xz_inner_core;
+ realw* d_b_R_yz_inner_core;
+
+
+ realw* d_factor_common_inner_core;
+ realw* d_one_minus_sum_beta_inner_core;
+
+ realw* d_epsilondev_xx_inner_core;
+ realw* d_epsilondev_yy_inner_core;
+ realw* d_epsilondev_xy_inner_core;
+ realw* d_epsilondev_xz_inner_core;
+ realw* d_epsilondev_yz_inner_core;
+
+ realw* d_b_epsilondev_xx_inner_core;
+ realw* d_b_epsilondev_yy_inner_core;
+ realw* d_b_epsilondev_xy_inner_core;
+ realw* d_b_epsilondev_xz_inner_core;
+ realw* d_b_epsilondev_yz_inner_core;
+
+ realw* d_eps_trace_over_3_inner_core;
+ realw* d_b_eps_trace_over_3_inner_core;
+
+ // inner / outer elements
+ int* d_phase_ispec_inner_inner_core;
+ int num_phase_ispec_inner_core;
+
+ int nspec_outer_inner_core;
+ int nspec_inner_inner_core;
+
+ int num_colors_inner_inner_core;
+ int num_colors_outer_inner_core;
+ int* h_num_elem_colors_inner_core;
+
+ // ------------------------------------------------------------------ //
+ // attenuation
+ // ------------------------------------------------------------------ //
+ realw* d_alphaval;
+ realw* d_betaval;
+ realw* d_gammaval;
+
+ realw* d_b_alphaval;
+ realw* d_b_betaval;
+ realw* d_b_gammaval;
+
+ // ------------------------------------------------------------------ //
+ // GLL points & weights
+ // ------------------------------------------------------------------ //
+
+ // pointers to constant memory arrays
+ realw* d_hprime_xx; realw* d_hprime_yy; realw* d_hprime_zz;
+ realw* d_hprimewgll_xx; realw* d_hprimewgll_yy; realw* d_hprimewgll_zz;
+ realw* d_wgllwgll_xy; realw* d_wgllwgll_xz; realw* d_wgllwgll_yz;
+ realw* d_wgll_cube;
+
+ // simulation type: 1 = forward, 2 = adjoint, 3 = kernel
+ int simulation_type;
+
+ // mesh coloring flag
+ int use_mesh_coloring_gpu;
+
+ // simulation flags
+ int save_forward;
+ int absorbing_conditions;
+ int attenuation;
+ int use_attenuation_mimic;
+ int compute_and_store_strain;
+ int anisotropic_3D_mantle;
+ int gravity;
+ int rotation;
+ int anisotropic_inner_core;
+ int save_boundary_mesh;
+
+ // ------------------------------------------------------------------ //
+ // gravity
+ // ------------------------------------------------------------------ //
+ realw* d_d_ln_density_dr_table; // needed for no gravity case
+ realw* d_minus_rho_g_over_kappa_fluid;
+ realw* d_minus_gravity_table;
+ realw* d_minus_deriv_gravity_table;
+ realw* d_density_table;
+
+ //daniel: TODO old...
+ realw* d_minus_g;
+ realw* d_minus_deriv_gravity;
+
+ // ------------------------------------------------------------------ //
+ // rotation
+ // ------------------------------------------------------------------ //
+ realw d_two_omega_earth;
+ realw d_deltat;
+ realw* d_A_array_rotation; realw* d_B_array_rotation;
+
+ // needed for backward/reconstructed fields (kernel runs)
+ realw d_b_two_omega_earth;
+ realw d_b_deltat;
+ realw* d_b_A_array_rotation; realw* d_b_B_array_rotation;
+
+ // ------------------------------------------------------------------ //
+ // sources
+ // ------------------------------------------------------------------ //
+ int nsources_local;
+ realw* d_sourcearrays;
+ double* d_stf_pre_compute;
+ int* d_islice_selected_source;
+ int* d_ispec_selected_source;
+
+ // ------------------------------------------------------------------ //
+ // receivers
+ // ------------------------------------------------------------------ //
+ int* d_number_receiver_global;
+ int* d_ispec_selected_rec;
+ int* d_islice_selected_rec;
+ int nrec_local;
+ realw* d_station_seismo_field;
+ realw* h_station_seismo_field;
+
+ // adjoint receivers/sources
+ int nadj_rec_local;
+ realw* d_adj_sourcearrays;
+ realw* h_adj_sourcearrays_slice;
+ int* d_pre_computed_irec;
+
+ // ------------------------------------------------------------------ //
+ // assembly
+ // ------------------------------------------------------------------ //
+
+ int num_interfaces_crust_mantle;
+ int max_nibool_interfaces_crust_mantle;
+ int* d_nibool_interfaces_crust_mantle;
+ int* d_ibool_interfaces_crust_mantle;
+ realw* d_send_accel_buffer_crust_mantle;
+
+ int num_interfaces_inner_core;
+ int max_nibool_interfaces_inner_core;
+ int* d_nibool_interfaces_inner_core;
+ int* d_ibool_interfaces_inner_core;
+ realw* d_send_accel_buffer_inner_core;
+
+ int num_interfaces_outer_core;
+ int max_nibool_interfaces_outer_core;
+ int* d_nibool_interfaces_outer_core;
+ int* d_ibool_interfaces_outer_core;
+ realw* d_send_accel_buffer_outer_core;
+
+
+// ------------------------------------------------------------------ //
+//daniel: TODO - former code...
+
+ // mesh resolution
+ int NSPEC_AB;
+ int NGLOB_AB;
+
+ // interpolators
+ realw* d_xix; realw* d_xiy; realw* d_xiz;
+ realw* d_etax; realw* d_etay; realw* d_etaz;
+ realw* d_gammax; realw* d_gammay; realw* d_gammaz;
+
+ // model parameters
+ realw* d_kappav; realw* d_muv;
+
+ // global indexing
+ int* d_ibool;
+
+ // inner / outer elements
+ int* d_ispec_is_inner;
+
+ // mpi buffers
+ int num_interfaces_ext_mesh;
+ int max_nibool_interfaces_ext_mesh;
+
+ // ------------------------------------------------------------------ //
+ // elastic wavefield parameters
+ // ------------------------------------------------------------------ //
+
+ // displacement, velocity, acceleration
+ realw* d_displ; realw* d_veloc; realw* d_accel;
+ // backward/reconstructed elastic wavefield
+ realw* d_b_displ; realw* d_b_veloc; realw* d_b_accel;
+
+ // elastic elements
+ int* d_ispec_is_elastic;
+
+ // elastic domain parameters
+ int* d_phase_ispec_inner_elastic;
+ int num_phase_ispec_elastic;
+
+ // ------------------------------------------------------------------ //
+ // mesh coloring
+ // ------------------------------------------------------------------ //
+ int* h_num_elem_colors_elastic;
+ int num_colors_outer_elastic,num_colors_inner_elastic;
+ int nspec_elastic;
+
+ realw* d_rmass;
+
+ // ------------------------------------------------------------------ //
+ // mpi buffer
+ // ------------------------------------------------------------------ //
+ realw* d_send_accel_buffer;
+
+ // interfaces
+ int* d_nibool_interfaces_ext_mesh;
+ int* d_ibool_interfaces_ext_mesh;
+
+ // ------------------------------------------------------------------ //
+ //used for absorbing stacey boundaries
+ // ------------------------------------------------------------------ //
+ int d_num_abs_boundary_faces;
+ int* d_abs_boundary_ispec;
+ int* d_abs_boundary_ijk;
+ realw* d_abs_boundary_normal;
+ realw* d_abs_boundary_jacobian2Dw;
+
+ realw* d_b_absorb_field;
+ int d_b_reclen_field;
+
+ realw* d_rho_vp;
+ realw* d_rho_vs;
+
+ // surface elements (to save for noise tomography and acoustic simulations)
+ int* d_free_surface_ispec;
+ int* d_free_surface_ijk;
+ int num_free_surface_faces;
+
+ // surface movie elements to save for noise tomography
+ realw* d_noise_surface_movie;
+
+ // attenuation
+ realw* d_R_xx;
+ realw* d_R_yy;
+ realw* d_R_xy;
+ realw* d_R_xz;
+ realw* d_R_yz;
+
+ realw* d_one_minus_sum_beta;
+ realw* d_factor_common;
+
+ // attenuation & kernel
+ realw* d_epsilondev_xx;
+ realw* d_epsilondev_yy;
+ realw* d_epsilondev_xy;
+ realw* d_epsilondev_xz;
+ realw* d_epsilondev_yz;
+ realw* d_epsilon_trace_over_3;
+
+ // anisotropy
+ realw* d_c11store;
+ realw* d_c12store;
+ realw* d_c13store;
+ realw* d_c14store;
+ realw* d_c15store;
+ realw* d_c16store;
+ realw* d_c22store;
+ realw* d_c23store;
+ realw* d_c24store;
+ realw* d_c25store;
+ realw* d_c26store;
+ realw* d_c33store;
+ realw* d_c34store;
+ realw* d_c35store;
+ realw* d_c36store;
+ realw* d_c44store;
+ realw* d_c45store;
+ realw* d_c46store;
+ realw* d_c55store;
+ realw* d_c56store;
+ realw* d_c66store;
+
+ // noise
+ realw* d_normal_x_noise;
+ realw* d_normal_y_noise;
+ realw* d_normal_z_noise;
+ realw* d_mask_noise;
+ realw* d_free_surface_jacobian2Dw;
+
+ realw* d_noise_sourcearray;
+
+ // attenuation & kernel backward fields
+ realw* d_b_R_xx;
+ realw* d_b_R_yy;
+ realw* d_b_R_xy;
+ realw* d_b_R_xz;
+ realw* d_b_R_yz;
+
+ realw* d_b_epsilondev_xx;
+ realw* d_b_epsilondev_yy;
+ realw* d_b_epsilondev_xy;
+ realw* d_b_epsilondev_xz;
+ realw* d_b_epsilondev_yz;
+ realw* d_b_epsilon_trace_over_3;
+
+ // sensitivity kernels
+ realw* d_rho_kl;
+ realw* d_mu_kl;
+ realw* d_kappa_kl;
+
+ // noise sensitivity kernel
+ realw* d_Sigma_kl;
+
+ // approximative hessian for preconditioning kernels
+ realw* d_hess_el_kl;
+
+ // oceans
+ realw* d_rmass_ocean_load;
+ realw* d_free_surface_normal;
+ int* d_updated_dof_ocean_load;
+
+ // ------------------------------------------------------------------ //
+ // acoustic wavefield
+ // ------------------------------------------------------------------ //
+ // potential and first and second time derivative
+ realw* d_potential_acoustic; realw* d_potential_dot_acoustic; realw* d_potential_dot_dot_acoustic;
+ // backward/reconstructed wavefield
+ realw* d_b_potential_acoustic; realw* d_b_potential_dot_acoustic; realw* d_b_potential_dot_dot_acoustic;
+
+ // acoustic domain parameters
+ int* d_ispec_is_acoustic;
+
+ int* d_phase_ispec_inner_acoustic;
+ int num_phase_ispec_acoustic;
+
+ // mesh coloring
+ int* h_num_elem_colors_acoustic;
+ int num_colors_outer_acoustic,num_colors_inner_acoustic;
+ int nspec_acoustic;
+
+ realw* d_rhostore;
+ realw* d_kappastore;
+ realw* d_rmass_acoustic;
+
+ // mpi buffer
+ realw* d_send_potential_dot_dot_buffer;
+
+ realw* d_b_absorb_potential;
+ int d_b_reclen_potential;
+
+ // for writing seismograms
+ realw* d_station_seismo_potential;
+ realw* h_station_seismo_potential;
+
+ // sensitivity kernels
+ realw* d_rho_ac_kl;
+ realw* d_kappa_ac_kl;
+
+ // approximative hessian for preconditioning kernels
+ realw* d_hess_ac_kl;
+
+ // coupling acoustic-elastic
+ int* d_coupling_ac_el_ispec;
+ int* d_coupling_ac_el_ijk;
+ realw* d_coupling_ac_el_normal;
+ realw* d_coupling_ac_el_jacobian2Dw;
+
+
+} Mesh;
+
+
+#endif
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,304 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(fortranflush,FORTRANFLUSH)(int* rank){
+TRACE("fortranflush");
+
+ fflush(stdout);
+ fflush(stderr);
+ printf("Flushing proc %d!\n",*rank);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(fortranprint,FORTRANPRINT)(int* id) {
+TRACE("fortranprint");
+
+ int procid;
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+#else
+ procid = 0;
+#endif
+ printf("%d: sends msg_id %d\n",procid,*id);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(fortranprintf,FORTRANPRINTF)(realw* val) {
+TRACE("fortranprintf");
+
+ int procid;
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+#else
+ procid = 0;
+#endif
+ printf("%d: sends val %e\n",procid,*val);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(fortranprintd,FORTRANPRINTD)(double* val) {
+TRACE("fortranprintd");
+
+ int procid;
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+#else
+ procid = 0;
+#endif
+ printf("%d: sends val %e\n",procid,*val);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// randomize displ for testing
+extern "C"
+void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,realw* h_displ) {
+TRACE("make_displ_rand");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+ // realw* displ_rnd = (realw*)malloc(mp->NGLOB_AB*3*sizeof(realw));
+ for(int i=0;i<mp->NGLOB_AB*3;i++) {
+ h_displ[i] = rand();
+ }
+ cudaMemcpy(mp->d_displ,h_displ,mp->NGLOB_AB*3*sizeof(realw),cudaMemcpyHostToDevice);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void transfer_surface_to_host_kernel(int* free_surface_ispec,
+ int* free_surface_ijk,
+ int num_free_surface_faces,
+ int* ibool,
+ realw* displ,
+ realw* noise_surface_movie) {
+ int igll = threadIdx.x;
+ int iface = blockIdx.x + blockIdx.y*gridDim.x;
+
+ // int id = tx + blockIdx.x*blockDim.x + blockIdx.y*blockDim.x*gridDim.x;
+
+ if(iface < num_free_surface_faces) {
+ int ispec = free_surface_ispec[iface]-1; //-1 for C-based indexing
+
+ int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
+ int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
+ int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
+
+ int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
+ noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)] = displ[iglob*3];
+ noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)] = displ[iglob*3+1];
+ noise_surface_movie[INDEX3(NDIM,NGLL2,2,igll,iface)] = displ[iglob*3+2];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_surface_to_host,
+ TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,
+ realw* h_noise_surface_movie) {
+TRACE("transfer_surface_to_host");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ int num_blocks_x = mp->num_free_surface_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y,1);
+ dim3 threads(NGLL2,1,1);
+
+ transfer_surface_to_host_kernel<<<grid,threads>>>(mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ mp->num_free_surface_faces,
+ mp->d_ibool,
+ mp->d_displ,
+ mp->d_noise_surface_movie);
+
+ cudaMemcpy(h_noise_surface_movie,mp->d_noise_surface_movie,
+ 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw),cudaMemcpyDeviceToHost);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_surface_to_host");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void noise_read_add_surface_movie_cuda_kernel(realw* accel, int* ibool,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int num_free_surface_faces,
+ realw* noise_surface_movie,
+ realw* normal_x_noise,
+ realw* normal_y_noise,
+ realw* normal_z_noise,
+ realw* mask_noise,
+ realw* free_surface_jacobian2Dw) {
+
+ int iface = blockIdx.x + gridDim.x*blockIdx.y; // surface element id
+
+ // when nspec_top > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
+ if(iface < num_free_surface_faces) {
+ int ispec = free_surface_ispec[iface]-1;
+
+ int igll = threadIdx.x;
+
+ int ipoin = NGLL2*iface + igll;
+ int i=free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
+ int j=free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
+ int k=free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
+
+ int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
+ realw normal_x = normal_x_noise[ipoin];
+ realw normal_y = normal_y_noise[ipoin];
+ realw normal_z = normal_z_noise[ipoin];
+
+ realw eta = (noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)]*normal_x +
+ noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)]*normal_y +
+ noise_surface_movie[INDEX3(NDIM,NGLL2,2,igll,iface)]*normal_z);
+
+ // error from cuda-memcheck and ddt seems "incorrect", because we
+ // are passing a __constant__ variable pointer around like it was
+ // made using cudaMalloc, which *may* be "incorrect", but produces
+ // correct results.
+
+ // ========= Invalid __global__ read of size
+ // 4 ========= at 0x00000cd8 in
+ // compute_add_sources_cuda.cu:260:noise_read_add_surface_movie_cuda_kernel
+ // ========= by thread (0,0,0) in block (3443,0) ========= Address
+ // 0x203000c8 is out of bounds
+
+ // non atomic version for speed testing -- atomic updates are needed for correctness
+ // accel[3*iglob] += eta*mask_noise[ipoin] * normal_x * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + NGLL2*ispec2D];
+ // accel[3*iglob+1] += eta*mask_noise[ipoin] * normal_y * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + NGLL2*ispec2D];
+ // accel[3*iglob+2] += eta*mask_noise[ipoin] * normal_z * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + NGLL2*ispec2D];
+
+ // Fortran version in SVN -- note deletion of wgllwgll_xy?
+ // accel(1,iglob) = accel(1,iglob) + eta * mask_noise(ipoin) * normal_x_noise(ipoin) &
+ // * free_surface_jacobian2Dw(igll,iface)
+ // accel(2,iglob) = accel(2,iglob) + eta * mask_noise(ipoin) * normal_y_noise(ipoin) &
+ // * free_surface_jacobian2Dw(igll,iface)
+ // accel(3,iglob) = accel(3,iglob) + eta * mask_noise(ipoin) * normal_z_noise(ipoin) &
+ // * free_surface_jacobian2Dw(igll,iface) ! wgllwgll_xy(i,j) * jacobian2D_top(i,j,iface)
+
+ // atomicAdd(&accel[iglob*3] ,eta*mask_noise[ipoin]*normal_x*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+NGLL2*iface]);
+ // atomicAdd(&accel[iglob*3+1],eta*mask_noise[ipoin]*normal_y*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+NGLL2*iface]);
+ // atomicAdd(&accel[iglob*3+2],eta*mask_noise[ipoin]*normal_z*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+NGLL2*iface]);
+
+ atomicAdd(&accel[iglob*3] ,eta*mask_noise[ipoin]*normal_x*free_surface_jacobian2Dw[igll+NGLL2*iface]);
+ atomicAdd(&accel[iglob*3+1],eta*mask_noise[ipoin]*normal_y*free_surface_jacobian2Dw[igll+NGLL2*iface]);
+ atomicAdd(&accel[iglob*3+2],eta*mask_noise[ipoin]*normal_z*free_surface_jacobian2Dw[igll+NGLL2*iface]);
+
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(noise_read_add_surface_movie_cu,
+ NOISE_READ_ADD_SURFACE_MOVIE_CU)(long* Mesh_pointer_f,
+ realw* h_noise_surface_movie,
+ int* NOISE_TOMOGRAPHYf) {
+TRACE("noise_read_add_surface_movie_cu");
+
+ // EPIK_TRACER("noise_read_add_surface_movie_cu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ int NOISE_TOMOGRAPHY = *NOISE_TOMOGRAPHYf;
+
+ cudaMemcpy(mp->d_noise_surface_movie,h_noise_surface_movie,
+ 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw),cudaMemcpyHostToDevice);
+
+ int num_blocks_x = mp->num_free_surface_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y,1);
+ dim3 threads(NGLL2,1,1);
+
+ if(NOISE_TOMOGRAPHY == 2) { // add surface source to forward field
+ noise_read_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_accel,
+ mp->d_ibool,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ mp->num_free_surface_faces,
+ mp->d_noise_surface_movie,
+ mp->d_normal_x_noise,
+ mp->d_normal_y_noise,
+ mp->d_normal_z_noise,
+ mp->d_mask_noise,
+ mp->d_free_surface_jacobian2Dw);
+ }
+ else if(NOISE_TOMOGRAPHY == 3) { // add surface source to adjoint (backward) field
+ noise_read_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_b_accel,
+ mp->d_ibool,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ mp->num_free_surface_faces,
+ mp->d_noise_surface_movie,
+ mp->d_normal_x_noise,
+ mp->d_normal_y_noise,
+ mp->d_normal_z_noise,
+ mp->d_mask_noise,
+ mp->d_free_surface_jacobian2Dw);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("noise_read_add_surface_movie_cuda_kernel");
+#endif
+}
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_constants_cuda.h 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,128 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#ifndef CUDA_HEADER_H
+#define CUDA_HEADER_H
+
+typedef float realw; // type of "working" variables
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// setters for these const arrays (very ugly hack, but will have to do)
+
+// elastic
+void setConst_hprime_xx(realw* array,Mesh* mp);
+void setConst_hprime_yy(realw* array,Mesh* mp);
+void setConst_hprime_zz(realw* array,Mesh* mp);
+
+void setConst_hprimewgll_xx(realw* array,Mesh* mp);
+void setConst_hprimewgll_yy(realw* array,Mesh* mp);
+void setConst_hprimewgll_zz(realw* array,Mesh* mp);
+
+void setConst_wgllwgll_xy(realw* array,Mesh* mp);
+void setConst_wgllwgll_xz(realw* array, Mesh* mp);
+void setConst_wgllwgll_yz(realw* array, Mesh* mp);
+
+void setConst_wgll_cube(realw* array, Mesh* mp);
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/* CUDA specific things from specfem3D_kernels.cu */
+
+#ifdef USE_TEXTURES
+ // declaration of textures
+ texture<realw, 1, cudaReadModeElementType> tex_displ;
+ texture<realw, 1, cudaReadModeElementType> tex_accel;
+
+ texture<realw, 1, cudaReadModeElementType> tex_potential;
+ texture<realw, 1, cudaReadModeElementType> tex_potential_dot_dot;
+
+ // for binding the textures
+
+ void bindTexturesDispl(realw* d_displ)
+ {
+ cudaError_t err;
+
+ cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<realw>();
+
+ err = cudaBindTexture(NULL,tex_displ, d_displ, channelDescFloat, NDIM*NGLOB*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in bindTexturesDispl for displ: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ }
+
+ void bindTexturesAccel(realw* d_accel)
+ {
+ cudaError_t err;
+
+ cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<realw>();
+
+ err = cudaBindTexture(NULL,tex_accel, d_accel, channelDescFloat, NDIM*NGLOB*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in bindTexturesAccel for accel: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ }
+
+ void bindTexturesPotential(realw* d_potential)
+ {
+ cudaError_t err;
+
+ cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<realw>();
+
+ err = cudaBindTexture(NULL,tex_potential, d_potential,
+ channelDescFloat, NGLOB*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in bindTexturesPotential for potential: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ }
+
+ void bindTexturesPotential_dot_dot(realw* d_potential_dot_dot)
+ {
+ cudaError_t err;
+
+ cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<realw>();
+
+ err = cudaBindTexture(NULL,tex_potential_dot_dot, d_potential_dot_dot,
+ channelDescFloat, NGLOB*sizeof(realw));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in bindTexturesPotential_dot_dot for potential_dot_dot: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ }
+
+#endif // USE_TEXTURES
+
+
+#endif //CUDA_HEADER_H
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,2755 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+#include "prepare_constants_cuda.h"
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// GPU preparation
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_cuda_device,
+ PREPARE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices) {
+ TRACE("prepare_cuda_device");
+
+ // Gets rank number of MPI process
+ int myrank = *myrank_f;
+
+ // cuda initialization (needs -lcuda library)
+ // note: cuInit initializes the driver API.
+ // it is needed for any following CUDA driver API function call (format cuFUNCTION(..) )
+ // however, for the CUDA runtime API functions (format cudaFUNCTION(..) )
+ // the initialization is implicit, thus cuInit() here would not be needed...
+ CUresult status = cuInit(0);
+ if ( CUDA_SUCCESS != status ) exit_on_error("CUDA driver API device initialization failed\n");
+
+ // returns a handle to the first cuda compute device
+ CUdevice dev;
+ status = cuDeviceGet(&dev, 0);
+ if ( CUDA_SUCCESS != status ) exit_on_error("CUDA device not found\n");
+
+ // gets device properties
+ int major,minor;
+ status = cuDeviceComputeCapability(&major,&minor,dev);
+ if ( CUDA_SUCCESS != status ) exit_on_error("CUDA device information not found\n");
+
+ // make sure that the device has compute capability >= 1.3
+ if (major < 1){
+ fprintf(stderr,"Compute capability major number should be at least 1, got: %d \nexiting...\n",major);
+ exit_on_error("CUDA Compute capability major number should be at least 1\n");
+ }
+ if (major == 1 && minor < 3){
+ fprintf(stderr,"Compute capability should be at least 1.3, got: %d.%d \nexiting...\n",major,minor);
+ exit_on_error("CUDA Compute capability major number should be at least 1.3\n");
+ }
+
+ // note: from here on we use the runtime API ...
+ // Gets number of GPU devices
+ int device_count = 0;
+ cudaGetDeviceCount(&device_count);
+ exit_on_cuda_error("CUDA runtime cudaGetDeviceCount: check if driver and runtime libraries work together\nexiting...\n");
+
+ // returns device count to fortran
+ if (device_count == 0) exit_on_error("CUDA runtime error: there is no device supporting CUDA\n");
+ *ncuda_devices = device_count;
+
+
+ // Sets the active device
+ if(device_count > 1) {
+ // generalized for more GPUs per node
+ // note: without previous context release, cudaSetDevice will complain with the cuda error
+ // "setting the device when a process is active is not allowed"
+ // releases previous contexts
+ cudaThreadExit();
+
+ //printf("rank %d: cuda device count = %d sets device = %d \n",myrank,device_count,myrank % device_count);
+ //MPI_Barrier(MPI_COMM_WORLD);
+
+ // sets active device
+ cudaSetDevice( myrank % device_count );
+ exit_on_cuda_error("cudaSetDevice");
+ }
+
+ // returns a handle to the active device
+ int device;
+ cudaGetDevice(&device);
+
+ // get device properties
+ struct cudaDeviceProp deviceProp;
+ cudaGetDeviceProperties(&deviceProp,device);
+
+ // exit if the machine has no CUDA-enabled device
+ if (deviceProp.major == 9999 && deviceProp.minor == 9999){
+ fprintf(stderr,"No CUDA-enabled device found, exiting...\n\n");
+ exit_on_error("CUDA runtime error: there is no CUDA-enabled device found\n");
+ }
+
+ // outputs device infos to file
+ char filename[BUFSIZ];
+ FILE* fp;
+ sprintf(filename,"OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
+ fp = fopen(filename,"a+");
+ if (fp != NULL){
+ // display device properties
+ fprintf(fp,"Device Name = %s\n",deviceProp.name);
+ fprintf(fp,"multiProcessorCount: %d\n",deviceProp.multiProcessorCount);
+ fprintf(fp,"totalGlobalMem (in MB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f));
+ fprintf(fp,"totalGlobalMem (in GB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f * 1024.f));
+ fprintf(fp,"sharedMemPerBlock (in bytes): %lu\n",(unsigned long) deviceProp.sharedMemPerBlock);
+ fprintf(fp,"Maximum number of threads per block: %d\n",deviceProp.maxThreadsPerBlock);
+ fprintf(fp,"Maximum size of each dimension of a block: %d x %d x %d\n",
+ deviceProp.maxThreadsDim[0],deviceProp.maxThreadsDim[1],deviceProp.maxThreadsDim[2]);
+ fprintf(fp,"Maximum sizes of each dimension of a grid: %d x %d x %d\n",
+ deviceProp.maxGridSize[0],deviceProp.maxGridSize[1],deviceProp.maxGridSize[2]);
+ fprintf(fp,"Compute capability of the device = %d.%d\n", deviceProp.major, deviceProp.minor);
+ if(deviceProp.canMapHostMemory){
+ fprintf(fp,"canMapHostMemory: TRUE\n");
+ }else{
+ fprintf(fp,"canMapHostMemory: FALSE\n");
+ }
+ if(deviceProp.deviceOverlap){
+ fprintf(fp,"deviceOverlap: TRUE\n");
+ }else{
+ fprintf(fp,"deviceOverlap: FALSE\n");
+ }
+
+ // make sure that the device has compute capability >= 1.3
+ //if (deviceProp.major < 1){
+ // fprintf(stderr,"Compute capability major number should be at least 1, exiting...\n\n");
+ // exit_on_error("CUDA Compute capability major number should be at least 1");
+ //}
+ //if (deviceProp.major == 1 && deviceProp.minor < 3){
+ // fprintf(stderr,"Compute capability should be at least 1.3, exiting...\n");
+ // exit_on_error("CUDA Compute capability major number should be at least 1.3");
+ //}
+
+ // outputs initial memory infos via cudaMemGetInfo()
+ double free_db,used_db,total_db;
+ get_free_memory(&free_db,&used_db,&total_db);
+ fprintf(fp,"%d: GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank,
+ used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+
+ fclose(fp);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// SIMULATION constants
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_constants_device,
+ PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
+ int* h_NGLLX,
+ realw* h_hprime_xx,realw* h_hprime_yy,realw* h_hprime_zz,
+ realw* h_hprimewgll_xx,realw* h_hprimewgll_yy,realw* h_hprimewgll_zz,
+ realw* h_wgllwgll_xy,realw* h_wgllwgll_xz,realw* h_wgllwgll_yz,
+ int* NSOURCES,int* nsources_local,
+ realw* h_sourcearrays,
+ int* h_islice_selected_source,
+ int* h_ispec_selected_source,
+ int* h_number_receiver_global,
+ int* h_ispec_selected_rec,
+ int* nrec,int* nrec_local,
+ int* NSPEC_CRUST_MANTLE, int* NGLOB_CRUST_MANTLE,
+ int* NSPEC_OUTER_CORE, int* NGLOB_OUTER_CORE,
+ int* NSPEC_INNER_CORE, int* NGLOB_INNER_CORE,
+ int* SIMULATION_TYPE,
+ int* SAVE_FORWARD_f,
+ int* ABSORBING_CONDITIONS_f,
+ int* GRAVITY_f,
+ int* ROTATION_f,
+ int* ATTENUATION_f,
+ int* USE_ATTENUATION_MIMIC_f,
+ int* COMPUTE_AND_STORE_STRAIN_f,
+ int* ANISOTROPIC_3D_MANTLE_f,
+ int* ANISOTROPIC_INNER_CORE_f,
+ int* SAVE_BOUNDARY_MESH_f,
+ int* USE_MESH_COLORING_GPU_f) {
+
+TRACE("prepare_constants_device");
+
+ // allocates mesh parameter structure
+ Mesh* mp = (Mesh*) malloc( sizeof(Mesh) );
+ if (mp == NULL) exit_on_error("error allocating mesh pointer");
+ *Mesh_pointer = (long)mp;
+
+ // checks if NGLLX == 5
+ if( *h_NGLLX != NGLLX ){
+ exit_on_error("NGLLX must be 5 for CUDA devices");
+ }
+
+ // sets constant arrays
+ setConst_hprime_xx(h_hprime_xx,mp);
+ setConst_hprime_yy(h_hprime_yy,mp);
+ setConst_hprime_zz(h_hprime_zz,mp);
+ setConst_hprimewgll_xx(h_hprimewgll_xx,mp);
+ setConst_hprimewgll_yy(h_hprimewgll_yy,mp);
+ setConst_hprimewgll_zz(h_hprimewgll_zz,mp);
+ setConst_wgllwgll_xy(h_wgllwgll_xy,mp);
+ setConst_wgllwgll_xz(h_wgllwgll_xz,mp);
+ setConst_wgllwgll_yz(h_wgllwgll_yz,mp);
+
+ // sets global parameters
+ mp->NSPEC_CRUST_MANTLE = *NSPEC_CRUST_MANTLE;
+ mp->NGLOB_CRUST_MANTLE = *NGLOB_CRUST_MANTLE;
+ mp->NSPEC_OUTER_CORE = *NSPEC_OUTER_CORE;
+ mp->NGLOB_OUTER_CORE = *NGLOB_OUTER_CORE;
+ mp->NSPEC_INNER_CORE = *NSPEC_INNER_CORE;
+ mp->NGLOB_INNER_CORE = *NGLOB_INNER_CORE;
+
+ // simulation type
+ mp->simulation_type = *SIMULATION_TYPE;
+
+ // simulation flags initialization
+ mp->save_forward = *SAVE_FORWARD_f;
+ mp->absorbing_conditions = *ABSORBING_CONDITIONS_f;
+ mp->gravity = *GRAVITY_f;
+ mp->rotation = *ROTATION_f;
+ mp->attenuation = *ATTENUATION_f;
+ mp->use_attenuation_mimic = *USE_ATTENUATION_MIMIC_f;
+ mp->compute_and_store_strain = *COMPUTE_AND_STORE_STRAIN_f;
+ mp->anisotropic_3D_mantle = *ANISOTROPIC_3D_MANTLE_f;
+ mp->anisotropic_inner_core = *ANISOTROPIC_INNER_CORE_f;
+ mp->save_boundary_mesh = *SAVE_BOUNDARY_MESH_f;
+
+
+ // mesh coloring flag
+#ifdef USE_MESH_COLORING_GPU
+ mp->use_mesh_coloring_gpu = 1;
+ if( ! *USE_MESH_COLORING_GPU_f ) exit_on_error("error with USE_MESH_COLORING_GPU constant; please re-compile\n");
+#else
+ // mesh coloring
+ // note: this here passes the coloring as an option to the kernel routines
+ // the performance seems to be the same if one uses the pre-processing directives above or not
+ mp->use_mesh_coloring_gpu = *USE_MESH_COLORING_GPU_f;
+#endif
+
+ // sources
+ mp->nsources_local = *nsources_local;
+ if( mp->simulation_type == 1 || mp->simulation_type == 3 ){
+ // not needed in case of pure adjoint simulations (SIMULATION_TYPE == 2)
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_sourcearrays,
+ sizeof(realw)* *NSOURCES*3*NGLL3),1301);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_sourcearrays, h_sourcearrays,
+ sizeof(realw)* *NSOURCES*3*NGLL3,cudaMemcpyHostToDevice),1302);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_stf_pre_compute,
+ *NSOURCES*sizeof(double)),1303);
+ }
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_source,
+ sizeof(int) * *NSOURCES),1401);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_source, h_islice_selected_source,
+ sizeof(int)* *NSOURCES,cudaMemcpyHostToDevice),1402);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_ispec_selected_source,
+ sizeof(int)* *NSOURCES),1403);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_source, h_ispec_selected_source,
+ sizeof(int)* *NSOURCES,cudaMemcpyHostToDevice),1404);
+
+
+ // receiver stations
+ // note that: size(number_receiver_global) = nrec_local
+ // size(ispec_selected_rec) = nrec
+ // number of receiver located in this partition
+ mp->nrec_local = *nrec_local;
+ if( mp->nrec_local > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_number_receiver_global),mp->nrec_local*sizeof(int)),1);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_number_receiver_global,h_number_receiver_global,
+ mp->nrec_local*sizeof(int),cudaMemcpyHostToDevice),1512);
+ }
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_selected_rec),(*nrec)*sizeof(int)),1513);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_rec,h_ispec_selected_rec,
+ (*nrec)*sizeof(int),cudaMemcpyHostToDevice),1514);
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_constants_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ROTATION simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_fields_rotation_device,
+ PREPARE_FIELDS_ROTATION_DEVICE)(long* Mesh_pointer_f,
+ realw* two_omega_earth,
+ realw* deltat,
+ realw* A_array_rotation,
+ realw* B_array_rotation,
+ realw* b_two_omega_earth,
+ realw* b_deltat,
+ realw* b_A_array_rotation,
+ realw* b_B_array_rotation,
+ int* NSPEC_OUTER_CORE_ROTATION
+ ) {
+
+ TRACE("prepare_fields_rotation_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // arrays only needed when rotation is required
+ if( ! mp->rotation ){ exit_on_cuda_error("prepare_fields_rotation_device rotation not properly initialized"); }
+
+ // rotation arrays (needed only for outer core region)
+ mp->d_two_omega_earth = *two_omega_earth;
+ mp->d_deltat = *deltat;
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_A_array_rotation,
+ (*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw)),9000);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_A_array_rotation, A_array_rotation,
+ (*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw),cudaMemcpyHostToDevice),9001);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_B_array_rotation,
+ (*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw)),9000);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_B_array_rotation, B_array_rotation,
+ (*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw),cudaMemcpyHostToDevice),9001);
+
+ // backward/reconstructed fields
+ if( mp->simulation_type == 3 ){
+ mp->d_b_two_omega_earth = *b_two_omega_earth;
+ mp->d_b_deltat = *b_deltat;
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_A_array_rotation,
+ (*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw)),9000);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_A_array_rotation, b_A_array_rotation,
+ (*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw),cudaMemcpyHostToDevice),9001);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_B_array_rotation,
+ (*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw)),9000);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_B_array_rotation, b_B_array_rotation,
+ (*NSPEC_OUTER_CORE_ROTATION)*sizeof(realw),cudaMemcpyHostToDevice),9001);
+ }
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// GRAVITY simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_fields_gravity_device,
+ PREPARE_FIELDS_gravity_DEVICE)(long* Mesh_pointer_f,
+ realw* d_ln_density_dr_table,
+ realw* minus_rho_g_over_kappa_fluid,
+ realw* minus_gravity_table,
+ realw* minus_deriv_gravity_table,
+ realw* density_table,
+ realw* h_wgll_cube,
+ int* NRAD_GRAVITY
+ ) {
+
+ TRACE("prepare_fields_gravity_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ if( ! mp->gravity ){
+ // no gravity case
+
+ // d ln(rho)/dr needed for the no gravity fluid potential
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_d_ln_density_dr_table,
+ (*NRAD_GRAVITY)*sizeof(realw)),8000);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_d_ln_density_dr_table, d_ln_density_dr_table,
+ (*NRAD_GRAVITY)*sizeof(realw),cudaMemcpyHostToDevice),8001);
+
+ }else{
+ // gravity case
+
+ // sets up gll weights cubed
+ setConst_wgll_cube(h_wgll_cube,mp);
+
+ // prepares gravity arrays
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_minus_rho_g_over_kappa_fluid,
+ (*NRAD_GRAVITY)*sizeof(realw)),8000);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_rho_g_over_kappa_fluid, minus_rho_g_over_kappa_fluid,
+ (*NRAD_GRAVITY)*sizeof(realw),cudaMemcpyHostToDevice),8001);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_minus_gravity_table,
+ (*NRAD_GRAVITY)*sizeof(realw)),8000);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_gravity_table, minus_gravity_table,
+ (*NRAD_GRAVITY)*sizeof(realw),cudaMemcpyHostToDevice),8001);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_minus_deriv_gravity_table,
+ (*NRAD_GRAVITY)*sizeof(realw)),8000);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_deriv_gravity_table, minus_deriv_gravity_table,
+ (*NRAD_GRAVITY)*sizeof(realw),cudaMemcpyHostToDevice),8001);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_density_table,
+ (*NRAD_GRAVITY)*sizeof(realw)),8000);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_density_table, density_table,
+ (*NRAD_GRAVITY)*sizeof(realw),cudaMemcpyHostToDevice),8001);
+ }
+}
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ATTENUATION simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_fields_attenuat_device,
+ PREPARE_FIELDS_ATTENUAT_DEVICE)(long* Mesh_pointer_f,
+ realw* R_xx_crust_mantle,
+ realw* R_yy_crust_mantle,
+ realw* R_xy_crust_mantle,
+ realw* R_xz_crust_mantle,
+ realw* R_yz_crust_mantle,
+ realw* factor_common_crust_mantle,
+ realw* one_minus_sum_beta_crust_mantle,
+ realw* R_xx_inner_core,
+ realw* R_yy_inner_core,
+ realw* R_xy_inner_core,
+ realw* R_xz_inner_core,
+ realw* R_yz_inner_core,
+ realw* factor_common_inner_core,
+ realw* one_minus_sum_beta_inner_core,
+ realw* alphaval,realw* betaval,realw* gammaval,
+ realw* b_alphaval,realw* b_betaval,realw* b_gammaval
+ ) {
+
+ TRACE("prepare_fields_attenuat_device");
+ int R_size1,R_size2,R_size3;
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // checks flag
+ if( ! mp->attenuation ){ exit_on_cuda_error("prepare_fields_attenuat_device attenuation not properly initialized"); }
+
+ // crust_mantle
+ R_size1 = N_SLS*NGLL3*mp->NSPEC_CRUST_MANTLE;
+ R_size2 = NGLL3*mp->NSPEC_CRUST_MANTLE;
+ R_size3 = N_SLS*NGLL3*mp->NSPEC_CRUST_MANTLE;
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_one_minus_sum_beta_crust_mantle,
+ R_size2*sizeof(realw)),4430);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_one_minus_sum_beta_crust_mantle,one_minus_sum_beta_crust_mantle,
+ R_size2*sizeof(realw),cudaMemcpyHostToDevice),4431);
+
+ if( ! mp->use_attenuation_mimic ){
+
+ //daniel: TODO - re-add the ones below, for now too little memory...
+ printf("skipping R_memory arrays, memory too small...\n");
+ if( 0 == 1 ){
+
+ // common factor
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_factor_common_crust_mantle,
+ R_size3*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_factor_common_crust_mantle,factor_common_crust_mantle,
+ R_size3*sizeof(realw),cudaMemcpyHostToDevice),4433);
+
+ // memory variables
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xx_crust_mantle,
+ R_size1*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_yy_crust_mantle,
+ R_size1*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xy_crust_mantle,
+ R_size1*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xz_crust_mantle,
+ R_size1*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_yz_crust_mantle,
+ R_size1*sizeof(realw)),4401);
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xx_crust_mantle,R_xx_crust_mantle,
+ R_size1*sizeof(realw),cudaMemcpyHostToDevice),4800);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yy_crust_mantle,R_yy_crust_mantle,
+ R_size1*sizeof(realw),cudaMemcpyHostToDevice),4800);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xy_crust_mantle,R_xy_crust_mantle,
+ R_size1*sizeof(realw),cudaMemcpyHostToDevice),4800);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xz_crust_mantle,R_xz_crust_mantle,
+ R_size1*sizeof(realw),cudaMemcpyHostToDevice),4800);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yz_crust_mantle,R_yz_crust_mantle,
+ R_size1*sizeof(realw),cudaMemcpyHostToDevice),4800);
+
+
+ }
+ }
+
+ // inner_core
+ R_size1 = 5*N_SLS*NGLL3*mp->NSPEC_INNER_CORE;
+ R_size2 = NGLL3*mp->NSPEC_INNER_CORE;
+ R_size3 = N_SLS*NGLL3*mp->NSPEC_INNER_CORE;
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_one_minus_sum_beta_inner_core,
+ R_size2*sizeof(realw)),4430);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_one_minus_sum_beta_inner_core,one_minus_sum_beta_inner_core,
+ R_size2*sizeof(realw),cudaMemcpyHostToDevice),4431);
+
+ if( ! mp->use_attenuation_mimic ){
+ // common factor
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_factor_common_inner_core,
+ R_size3*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_factor_common_inner_core,factor_common_inner_core,
+ R_size3*sizeof(realw),cudaMemcpyHostToDevice),4433);
+
+ // memory variables
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xx_inner_core,
+ R_size1*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_yy_inner_core,
+ R_size1*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xy_inner_core,
+ R_size1*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_xz_inner_core,
+ R_size1*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_R_yz_inner_core,
+ R_size1*sizeof(realw)),4401);
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xx_inner_core,R_xx_inner_core,
+ R_size1*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yy_inner_core,R_yy_inner_core,
+ R_size1*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xy_inner_core,R_xy_inner_core,
+ R_size1*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xz_inner_core,R_xz_inner_core,
+ R_size1*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yz_inner_core,R_yz_inner_core,
+ R_size1*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ }
+
+ // alpha,beta,gamma factors
+ print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_alphaval),
+ N_SLS*sizeof(realw)),4434);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_alphaval ,alphaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4435);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_betaval),
+ N_SLS*sizeof(realw)),4436);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_betaval ,betaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4437);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_gammaval),
+ N_SLS*sizeof(realw)),4438);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaval ,gammaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4439);
+
+ if( mp->simulation_type == 3 ){
+ // alpha,beta,gamma factors for backward fields
+ print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_b_alphaval),
+ N_SLS*sizeof(realw)),5434);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5435);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_b_betaval),
+ N_SLS*sizeof(realw)),5436);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5437);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &(mp->d_b_gammaval),
+ N_SLS*sizeof(realw)),5438);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5439);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// STRAIN simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_fields_strain_device,
+ PREPARE_FIELDS_STRAIN_DEVICE)(long* Mesh_pointer_f,
+ realw* epsilondev_xx_crust_mantle,
+ realw* epsilondev_yy_crust_mantle,
+ realw* epsilondev_xy_crust_mantle,
+ realw* epsilondev_xz_crust_mantle,
+ realw* epsilondev_yz_crust_mantle,
+ realw* b_epsilondev_xx_crust_mantle,
+ realw* b_epsilondev_yy_crust_mantle,
+ realw* b_epsilondev_xy_crust_mantle,
+ realw* b_epsilondev_xz_crust_mantle,
+ realw* b_epsilondev_yz_crust_mantle,
+ realw* eps_trace_over_3_crust_mantle,
+ realw* b_eps_trace_over_3_crust_mantle,
+ realw* epsilondev_xx_inner_core,
+ realw* epsilondev_yy_inner_core,
+ realw* epsilondev_xy_inner_core,
+ realw* epsilondev_xz_inner_core,
+ realw* epsilondev_yz_inner_core,
+ realw* b_epsilondev_xx_inner_core,
+ realw* b_epsilondev_yy_inner_core,
+ realw* b_epsilondev_xy_inner_core,
+ realw* b_epsilondev_xz_inner_core,
+ realw* b_epsilondev_yz_inner_core,
+ realw* eps_trace_over_3_inner_core,
+ realw* b_eps_trace_over_3_inner_core
+ ) {
+
+ TRACE("prepare_fields_strain_device");
+ int R_size,R_size2;
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // checks flag
+ if( ! mp->compute_and_store_strain ){ exit_on_cuda_error("prepare_fields_strain_device strain not properly initialized"); }
+
+ // crust_mantle
+ R_size = NGLL3*mp->NSPEC_CRUST_MANTLE;
+ R_size2 = NGLL3*mp->NSPEC_CRUST_MANTLE;
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xx_crust_mantle,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_yy_crust_mantle,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xy_crust_mantle,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xz_crust_mantle,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_yz_crust_mantle,
+ R_size*sizeof(realw)),4432);
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xx_crust_mantle,epsilondev_xx_crust_mantle,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yy_crust_mantle,epsilondev_yy_crust_mantle,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xy_crust_mantle,epsilondev_xy_crust_mantle,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xz_crust_mantle,epsilondev_xz_crust_mantle,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yz_crust_mantle,epsilondev_yz_crust_mantle,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_eps_trace_over_3_crust_mantle,
+ R_size2*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_eps_trace_over_3_crust_mantle,eps_trace_over_3_crust_mantle,
+ R_size2*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ // backward/reconstructed fields
+ if( mp->simulation_type == 3 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xx_crust_mantle,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_yy_crust_mantle,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xy_crust_mantle,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xz_crust_mantle,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_yz_crust_mantle,
+ R_size*sizeof(realw)),4432);
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx_crust_mantle,b_epsilondev_xx_crust_mantle,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy_crust_mantle,b_epsilondev_yy_crust_mantle,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy_crust_mantle,b_epsilondev_xy_crust_mantle,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz_crust_mantle,b_epsilondev_xz_crust_mantle,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz_crust_mantle,b_epsilondev_yz_crust_mantle,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_eps_trace_over_3_crust_mantle,
+ R_size2*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_eps_trace_over_3_crust_mantle,b_eps_trace_over_3_crust_mantle,
+ R_size2*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ }
+
+ // inner_core
+ R_size = NGLL3*mp->NSPEC_INNER_CORE;
+ R_size2 = NGLL3*mp->NSPEC_INNER_CORE;
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xx_inner_core,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_yy_inner_core,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xy_inner_core,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_xz_inner_core,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_epsilondev_yz_inner_core,
+ R_size*sizeof(realw)),4432);
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xx_inner_core,epsilondev_xx_inner_core,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yy_inner_core,epsilondev_yy_inner_core,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xy_inner_core,epsilondev_xy_inner_core,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xz_inner_core,epsilondev_xz_inner_core,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yz_inner_core,epsilondev_yz_inner_core,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_eps_trace_over_3_inner_core,
+ R_size2*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_eps_trace_over_3_inner_core,eps_trace_over_3_inner_core,
+ R_size2*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ // backward/reconstructed fields
+ if( mp->simulation_type == 3 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xx_inner_core,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_yy_inner_core,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xy_inner_core,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_xz_inner_core,
+ R_size*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_epsilondev_yz_inner_core,
+ R_size*sizeof(realw)),4432);
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx_inner_core,b_epsilondev_xx_inner_core,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy_inner_core,b_epsilondev_yy_inner_core,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy_inner_core,b_epsilondev_xy_inner_core,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz_inner_core,b_epsilondev_xz_inner_core,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz_inner_core,b_epsilondev_yz_inner_core,
+ R_size*sizeof(realw),cudaMemcpyHostToDevice),4433);
+
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_eps_trace_over_3_inner_core,
+ R_size2*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_eps_trace_over_3_inner_core,b_eps_trace_over_3_inner_core,
+ R_size2*sizeof(realw),cudaMemcpyHostToDevice),4402);
+ }
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// MPI interfaces
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_mpi_buffers_device,
+ PREPARE_MPI_BUFFERS_DEVICE)(long* Mesh_pointer_f,
+ int* num_interfaces_crust_mantle,
+ int* max_nibool_interfaces_crust_mantle,
+ int* nibool_interfaces_crust_mantle,
+ int* ibool_interfaces_crust_mantle,
+ int* num_interfaces_inner_core,
+ int* max_nibool_interfaces_inner_core,
+ int* nibool_interfaces_inner_core,
+ int* ibool_interfaces_inner_core,
+ int* num_interfaces_outer_core,
+ int* max_nibool_interfaces_outer_core,
+ int* nibool_interfaces_outer_core,
+ int* ibool_interfaces_outer_core
+ ){
+
+ TRACE("prepare_mpi_buffers_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // prepares interprocess-edge exchange information
+
+ // crust/mantle mesh
+ mp->num_interfaces_crust_mantle = *num_interfaces_crust_mantle;
+ mp->max_nibool_interfaces_crust_mantle = *max_nibool_interfaces_crust_mantle;
+ if( mp->num_interfaces_crust_mantle > 0 ){
+ // number of ibool entries array
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_crust_mantle,
+ (mp->num_interfaces_crust_mantle)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_crust_mantle,nibool_interfaces_crust_mantle,
+ (mp->num_interfaces_crust_mantle)*sizeof(int),cudaMemcpyHostToDevice),1202);
+ // ibool entries (iglob indices) values on interface
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_crust_mantle,
+ (mp->num_interfaces_crust_mantle)*(mp->max_nibool_interfaces_crust_mantle)*sizeof(int)),1203);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,
+ (mp->num_interfaces_crust_mantle)*(mp->max_nibool_interfaces_crust_mantle)*sizeof(int),
+ cudaMemcpyHostToDevice),1204);
+ // allocates mpi buffer for exchange with cpu
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer_crust_mantle),
+ 3*(mp->max_nibool_interfaces_crust_mantle)*(mp->num_interfaces_crust_mantle)*sizeof(realw)),4004);
+ }
+
+ // inner core mesh
+ mp->num_interfaces_inner_core = *num_interfaces_inner_core;
+ mp->max_nibool_interfaces_inner_core = *max_nibool_interfaces_inner_core;
+ if( mp->num_interfaces_inner_core > 0 ){
+ // number of ibool entries array
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_inner_core,
+ (mp->num_interfaces_inner_core)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_inner_core,nibool_interfaces_inner_core,
+ (mp->num_interfaces_inner_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
+ // ibool entries (iglob indices) values on interface
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_inner_core,
+ (mp->num_interfaces_inner_core)*(mp->max_nibool_interfaces_inner_core)*sizeof(int)),1203);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_inner_core,ibool_interfaces_inner_core,
+ (mp->num_interfaces_inner_core)*(mp->max_nibool_interfaces_inner_core)*sizeof(int),
+ cudaMemcpyHostToDevice),1204);
+ // allocates mpi buffer for exchange with cpu
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer_inner_core),
+ 3*(mp->max_nibool_interfaces_inner_core)*(mp->num_interfaces_inner_core)*sizeof(realw)),4004);
+ }
+
+ // outer core mesh
+ // note: uses only scalar wavefield arrays
+ mp->num_interfaces_outer_core = *num_interfaces_outer_core;
+ mp->max_nibool_interfaces_outer_core = *max_nibool_interfaces_outer_core;
+ if( mp->num_interfaces_outer_core > 0 ){
+ // number of ibool entries array
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_outer_core,
+ (mp->num_interfaces_outer_core)*sizeof(int)),1201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_outer_core,nibool_interfaces_outer_core,
+ (mp->num_interfaces_outer_core)*sizeof(int),cudaMemcpyHostToDevice),1202);
+ // ibool entries (iglob indices) values on interface
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_outer_core,
+ (mp->num_interfaces_outer_core)*(mp->max_nibool_interfaces_outer_core)*sizeof(int)),1203);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_outer_core,ibool_interfaces_outer_core,
+ (mp->num_interfaces_outer_core)*(mp->max_nibool_interfaces_outer_core)*sizeof(int),
+ cudaMemcpyHostToDevice),1204);
+ // allocates mpi buffer for exchange with cpu
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer_outer_core),
+ (mp->max_nibool_interfaces_outer_core)*(mp->num_interfaces_outer_core)*sizeof(realw)),4004);
+ }
+
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Earth regions
+
+// CRUST / MANTLE
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_crust_mantle_device,
+ PREPARE_CRUST_MANTLE_DEVICE)(long* Mesh_pointer_f,
+ realw* h_xix, realw* h_xiy, realw* h_xiz,
+ realw* h_etax, realw* h_etay, realw* h_etaz,
+ realw* h_gammax, realw* h_gammay, realw* h_gammaz,
+ realw* h_rho,
+ realw* h_kappav, realw* h_muv,
+ realw* h_kappah, realw* h_muh,
+ realw* h_eta_aniso,
+ realw* h_rmass,
+ int* h_ibool,
+ realw* h_xstore, realw* h_ystore, realw* h_zstore,
+ int* h_ispec_is_tiso,
+ realw *c11store,realw *c12store,realw *c13store,
+ realw *c14store,realw *c15store,realw *c16store,
+ realw *c22store,realw *c23store,realw *c24store,
+ realw *c25store,realw *c26store,realw *c33store,
+ realw *c34store,realw *c35store,realw *c36store,
+ realw *c44store,realw *c45store,realw *c46store,
+ realw *c55store,realw *c56store,realw *c66store,
+ int* num_phase_ispec,
+ int* phase_ispec_inner,
+ int* nspec_outer,
+ int* nspec_inner
+ ) {
+
+ TRACE("prepare_crust_mantle_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ /* Assuming NGLLX=5. Padded is then 128 (5^3+3) */
+ int size_padded = NGLL3_PADDED * (mp->NSPEC_CRUST_MANTLE);
+ int size_glob = mp->NGLOB_CRUST_MANTLE;
+
+ // mesh
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix_crust_mantle, size_padded*sizeof(realw)),1001);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy_crust_mantle, size_padded*sizeof(realw)),1002);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz_crust_mantle, size_padded*sizeof(realw)),1003);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax_crust_mantle, size_padded*sizeof(realw)),1004);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay_crust_mantle, size_padded*sizeof(realw)),1005);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz_crust_mantle, size_padded*sizeof(realw)),1006);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax_crust_mantle, size_padded*sizeof(realw)),1007);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay_crust_mantle, size_padded*sizeof(realw)),1008);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz_crust_mantle, size_padded*sizeof(realw)),1009);
+ // muv needed in case for attenuation (only Q_mu shear attenuation)
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_muvstore_crust_mantle, size_padded*sizeof(realw)),1011);
+
+ // transfer constant element data with padding
+ for(int i=0;i < mp->NSPEC_CRUST_MANTLE;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xix_crust_mantle + i*NGLL3_PADDED, &h_xix[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1501);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xiy_crust_mantle+i*NGLL3_PADDED, &h_xiy[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1502);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xiz_crust_mantle+i*NGLL3_PADDED, &h_xiz[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1503);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etax_crust_mantle+i*NGLL3_PADDED, &h_etax[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1504);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etay_crust_mantle+i*NGLL3_PADDED, &h_etay[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1505);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etaz_crust_mantle+i*NGLL3_PADDED, &h_etaz[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1506);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammax_crust_mantle+i*NGLL3_PADDED,&h_gammax[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1507);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammay_crust_mantle+i*NGLL3_PADDED,&h_gammay[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1508);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaz_crust_mantle+i*NGLL3_PADDED,&h_gammaz[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1509);
+ // muvstore
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_muvstore_crust_mantle+i*NGLL3_PADDED, &h_muv[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1511);
+ }
+
+ // global indexing
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_crust_mantle, size_padded*sizeof(int)),1021);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_crust_mantle, h_ibool,
+ NGLL3*(mp->NSPEC_CRUST_MANTLE)*sizeof(int),cudaMemcpyHostToDevice),1022);
+
+ // transverse isotropic elements
+ // only needed if not anisotropic 3D mantle
+ if( ! mp->anisotropic_3D_mantle ){
+ // no anisotropy
+
+ // transverse isotropy flag
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_tiso_crust_mantle, (mp->NSPEC_CRUST_MANTLE)*sizeof(int)),1025);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_tiso_crust_mantle, h_ispec_is_tiso,
+ (mp->NSPEC_CRUST_MANTLE)*sizeof(int),cudaMemcpyHostToDevice),1025);
+
+ // kappavstore, kappahstore
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappavstore_crust_mantle, size_padded*sizeof(realw)),1010);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappahstore_crust_mantle, size_padded*sizeof(realw)),1010);
+ // muhstore
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_muhstore_crust_mantle, size_padded*sizeof(realw)),1010);
+ // eta_anisostore
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_eta_anisostore_crust_mantle, size_padded*sizeof(realw)),1010);
+
+ // transfer with padding
+ for(int i=0;i < mp->NSPEC_CRUST_MANTLE;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_kappavstore_crust_mantle+i*NGLL3_PADDED,&h_kappav[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1510);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_kappahstore_crust_mantle+i*NGLL3_PADDED,&h_kappah[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1510);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_muhstore_crust_mantle+i*NGLL3_PADDED,&h_muh[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1510);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_eta_anisostore_crust_mantle+i*NGLL3_PADDED,&h_eta_aniso[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1510);
+ }
+ }else{
+ // anisotropic 3D mantle
+
+ // allocates memory on GPU
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c11store_crust_mantle),
+ size_padded*sizeof(realw)),4700);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c12store_crust_mantle),
+ size_padded*sizeof(realw)),4701);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c13store_crust_mantle),
+ size_padded*sizeof(realw)),4702);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c14store_crust_mantle),
+ size_padded*sizeof(realw)),4703);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c15store_crust_mantle),
+ size_padded*sizeof(realw)),4704);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c16store_crust_mantle),
+ size_padded*sizeof(realw)),4705);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c22store_crust_mantle),
+ size_padded*sizeof(realw)),4706);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c23store_crust_mantle),
+ size_padded*sizeof(realw)),4707);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c24store_crust_mantle),
+ size_padded*sizeof(realw)),4708);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c25store_crust_mantle),
+ size_padded*sizeof(realw)),4709);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c26store_crust_mantle),
+ size_padded*sizeof(realw)),4710);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c33store_crust_mantle),
+ size_padded*sizeof(realw)),4711);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c34store_crust_mantle),
+ size_padded*sizeof(realw)),4712);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c35store_crust_mantle),
+ size_padded*sizeof(realw)),4713);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c36store_crust_mantle),
+ size_padded*sizeof(realw)),4714);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c44store_crust_mantle),
+ size_padded*sizeof(realw)),4715);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c45store_crust_mantle),
+ size_padded*sizeof(realw)),4716);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c46store_crust_mantle),
+ size_padded*sizeof(realw)),4717);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c55store_crust_mantle),
+ size_padded*sizeof(realw)),4718);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c56store_crust_mantle),
+ size_padded*sizeof(realw)),4719);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c66store_crust_mantle),
+ size_padded*sizeof(realw)),4720);
+
+ // transfer constant element data with padding
+ for(int i=0;i < mp->NSPEC_CRUST_MANTLE;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c11store_crust_mantle + i*NGLL3_PADDED, &c11store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4800);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c12store_crust_mantle + i*NGLL3_PADDED, &c12store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4801);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c13store_crust_mantle + i*NGLL3_PADDED, &c13store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4802);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c14store_crust_mantle + i*NGLL3_PADDED, &c14store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4803);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c15store_crust_mantle + i*NGLL3_PADDED, &c15store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4804);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c16store_crust_mantle + i*NGLL3_PADDED, &c16store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4805);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c22store_crust_mantle + i*NGLL3_PADDED, &c22store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4806);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c23store_crust_mantle + i*NGLL3_PADDED, &c23store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4807);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c24store_crust_mantle + i*NGLL3_PADDED, &c24store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4808);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c25store_crust_mantle + i*NGLL3_PADDED, &c25store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4809);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c26store_crust_mantle + i*NGLL3_PADDED, &c26store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4810);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c33store_crust_mantle + i*NGLL3_PADDED, &c33store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4811);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c34store_crust_mantle + i*NGLL3_PADDED, &c34store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4812);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c35store_crust_mantle + i*NGLL3_PADDED, &c35store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4813);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c36store_crust_mantle + i*NGLL3_PADDED, &c36store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4814);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c44store_crust_mantle + i*NGLL3_PADDED, &c44store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4815);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c45store_crust_mantle + i*NGLL3_PADDED, &c45store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4816);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c46store_crust_mantle + i*NGLL3_PADDED, &c46store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4817);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c55store_crust_mantle + i*NGLL3_PADDED, &c55store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4818);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c56store_crust_mantle + i*NGLL3_PADDED, &c56store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4819);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c66store_crust_mantle + i*NGLL3_PADDED, &c66store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4820);
+ }
+ }
+
+ // needed for boundary kernel calculations
+ if( mp->simulation_type == 3 && mp->save_boundary_mesh ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rhostore_crust_mantle, size_padded*sizeof(realw)),1010);
+ for(int i=0;i < mp->NSPEC_CRUST_MANTLE;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rhostore_crust_mantle+i*NGLL3_PADDED, &h_rho[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),2106);
+ }
+ }
+
+ // mesh locations
+ // ystore & zstore needed for tiso elements
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ystore_crust_mantle),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ystore_crust_mantle,h_ystore,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_zstore_crust_mantle),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_zstore_crust_mantle,h_zstore,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+
+ // xstore only needed when gravity is on
+ if( mp->gravity ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_xstore_crust_mantle),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xstore_crust_mantle,h_xstore,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ }
+
+ // inner/outer elements
+ mp->num_phase_ispec_crust_mantle = *num_phase_ispec;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_crust_mantle),
+ mp->num_phase_ispec_crust_mantle*2*sizeof(int)),2008);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_crust_mantle,phase_ispec_inner,
+ mp->num_phase_ispec_crust_mantle*2*sizeof(int),cudaMemcpyHostToDevice),2101);
+
+ mp->nspec_outer_crust_mantle = *nspec_outer;
+ mp->nspec_inner_crust_mantle = *nspec_inner;
+
+ // wavefield
+ int size = NDIM * mp->NGLOB_CRUST_MANTLE;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_crust_mantle),sizeof(realw)*size),4001);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc_crust_mantle),sizeof(realw)*size),4002);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel_crust_mantle),sizeof(realw)*size),4003);
+ // backward/reconstructed wavefield
+ if( mp->simulation_type == 3 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ_crust_mantle),sizeof(realw)*size),4001);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc_crust_mantle),sizeof(realw)*size),4002);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel_crust_mantle),sizeof(realw)*size),4003);
+ }
+
+ // mass matrix
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_crust_mantle),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_crust_mantle,h_rmass,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_crust_mantle_device");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// OUTER CORE
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_outer_core_device,
+ PREPARE_OUTER_CORE_DEVICE)(long* Mesh_pointer_f,
+ realw* h_xix, realw* h_xiy, realw* h_xiz,
+ realw* h_etax, realw* h_etay, realw* h_etaz,
+ realw* h_gammax, realw* h_gammay, realw* h_gammaz,
+ realw* h_rho, realw* h_kappav,
+ realw* h_rmass,
+ int* h_ibool,
+ realw* h_xstore, realw* h_ystore, realw* h_zstore,
+ int* num_phase_ispec,
+ int* phase_ispec_inner,
+ int* nspec_outer,
+ int* nspec_inner
+ ) {
+
+ TRACE("prepare_outer_core_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ /* Assuming NGLLX=5. Padded is then 128 (5^3+3) */
+ int size_padded = NGLL3_PADDED * (mp->NSPEC_OUTER_CORE);
+ int size_glob = mp->NGLOB_OUTER_CORE;
+
+ // mesh
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix_outer_core, size_padded*sizeof(realw)),1001);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy_outer_core, size_padded*sizeof(realw)),1002);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz_outer_core, size_padded*sizeof(realw)),1003);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax_outer_core, size_padded*sizeof(realw)),1004);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay_outer_core, size_padded*sizeof(realw)),1005);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz_outer_core, size_padded*sizeof(realw)),1006);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax_outer_core, size_padded*sizeof(realw)),1007);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay_outer_core, size_padded*sizeof(realw)),1008);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz_outer_core, size_padded*sizeof(realw)),1009);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappavstore_outer_core, size_padded*sizeof(realw)),1010);
+
+ // transfer constant element data with padding
+ for(int i=0;i < mp->NSPEC_OUTER_CORE;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xix_outer_core + i*NGLL3_PADDED, &h_xix[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1501);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xiy_outer_core+i*NGLL3_PADDED, &h_xiy[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1502);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xiz_outer_core+i*NGLL3_PADDED, &h_xiz[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1503);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etax_outer_core+i*NGLL3_PADDED, &h_etax[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1504);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etay_outer_core+i*NGLL3_PADDED, &h_etay[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1505);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etaz_outer_core+i*NGLL3_PADDED, &h_etaz[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1506);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammax_outer_core+i*NGLL3_PADDED,&h_gammax[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1507);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammay_outer_core+i*NGLL3_PADDED,&h_gammay[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1508);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaz_outer_core+i*NGLL3_PADDED,&h_gammaz[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1509);
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_kappavstore_outer_core+i*NGLL3_PADDED,&h_kappav[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1510);
+ }
+
+ // needed for kernel calculations
+ if( mp->simulation_type == 3 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rhostore_outer_core, size_padded*sizeof(realw)),1010);
+ for(int i=0;i < mp->NSPEC_OUTER_CORE;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rhostore_outer_core+i*NGLL3_PADDED, &h_rho[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),2106);
+ }
+ }
+
+ // global indexing
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_outer_core, size_padded*sizeof(int)),1021);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_outer_core, h_ibool,
+ NGLL3*(mp->NSPEC_OUTER_CORE)*sizeof(int),cudaMemcpyHostToDevice),1022);
+
+ // mesh locations
+ // always needed
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_xstore_outer_core),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xstore_outer_core,h_xstore,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ystore_outer_core),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ystore_outer_core,h_ystore,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_zstore_outer_core),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_zstore_outer_core,h_zstore,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+
+ // inner/outer elements
+ mp->num_phase_ispec_outer_core = *num_phase_ispec;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_outer_core),
+ mp->num_phase_ispec_outer_core*2*sizeof(int)),2008);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_outer_core,phase_ispec_inner,
+ mp->num_phase_ispec_outer_core*2*sizeof(int),cudaMemcpyHostToDevice),2101);
+
+ mp->nspec_outer_outer_core = *nspec_outer;
+ mp->nspec_inner_outer_core = *nspec_inner;
+
+ // wavefield
+ int size = mp->NGLOB_OUTER_CORE;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_outer_core),sizeof(realw)*size),4001);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc_outer_core),sizeof(realw)*size),4002);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel_outer_core),sizeof(realw)*size),4003);
+ // backward/reconstructed wavefield
+ if( mp->simulation_type == 3 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ_outer_core),sizeof(realw)*size),4001);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc_outer_core),sizeof(realw)*size),4002);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel_outer_core),sizeof(realw)*size),4003);
+ }
+
+ // mass matrix
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_outer_core),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_outer_core,h_rmass,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_outer_core_device");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// INNER CORE
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_inner_core_device,
+ PREPARE_INNER_CORE_DEVICE)(long* Mesh_pointer_f,
+ realw* h_xix, realw* h_xiy, realw* h_xiz,
+ realw* h_etax, realw* h_etay, realw* h_etaz,
+ realw* h_gammax, realw* h_gammay, realw* h_gammaz,
+ realw* h_rho, realw* h_kappav, realw* h_muv,
+ realw* h_rmass,
+ int* h_ibool,
+ realw* h_xstore, realw* h_ystore, realw* h_zstore,
+ realw *c11store,realw *c12store,realw *c13store,
+ realw *c33store,realw *c44store,
+ int* h_idoubling_inner_core,
+ int* num_phase_ispec,
+ int* phase_ispec_inner,
+ int* nspec_outer,
+ int* nspec_inner
+ //int* iboolleft_xi, int* iboolright_xi,
+ //int* iboolleft_eta, int* iboolright_eta,
+ //int* npoin2D_xi, int* npoin2D_eta
+ ) {
+
+ TRACE("prepare_inner_core_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ /* Assuming NGLLX=5. Padded is then 128 (5^3+3) */
+ int size_padded = NGLL3_PADDED * (mp->NSPEC_INNER_CORE);
+ int size_glob = mp->NGLOB_INNER_CORE;
+
+ // mesh
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix_inner_core, size_padded*sizeof(realw)),1001);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy_inner_core, size_padded*sizeof(realw)),1002);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz_inner_core, size_padded*sizeof(realw)),1003);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax_inner_core, size_padded*sizeof(realw)),1004);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay_inner_core, size_padded*sizeof(realw)),1005);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz_inner_core, size_padded*sizeof(realw)),1006);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax_inner_core, size_padded*sizeof(realw)),1007);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay_inner_core, size_padded*sizeof(realw)),1008);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz_inner_core, size_padded*sizeof(realw)),1009);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_muvstore_inner_core, size_padded*sizeof(realw)),1011);
+
+ // transfer constant element data with padding
+ for(int i=0;i < mp->NSPEC_INNER_CORE;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xix_inner_core + i*NGLL3_PADDED, &h_xix[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1501);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xiy_inner_core+i*NGLL3_PADDED, &h_xiy[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1502);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xiz_inner_core+i*NGLL3_PADDED, &h_xiz[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1503);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etax_inner_core+i*NGLL3_PADDED, &h_etax[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1504);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etay_inner_core+i*NGLL3_PADDED, &h_etay[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1505);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etaz_inner_core+i*NGLL3_PADDED, &h_etaz[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1506);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammax_inner_core+i*NGLL3_PADDED,&h_gammax[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1507);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammay_inner_core+i*NGLL3_PADDED,&h_gammay[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1508);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaz_inner_core+i*NGLL3_PADDED,&h_gammaz[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1509);
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_muvstore_inner_core+i*NGLL3_PADDED, &h_muv[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1511);
+ }
+
+ // anisotropy
+ if( ! mp->anisotropic_inner_core ){
+ // no anisotropy (uses kappav and muv in inner core)
+ // kappavstore needed
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappavstore_inner_core, size_padded*sizeof(realw)),1010);
+ for(int i=0;i < mp->NSPEC_INNER_CORE;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_kappavstore_inner_core+i*NGLL3_PADDED,&h_kappav[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),1510);
+ }
+ }else{
+ // anisotropic inner core
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c11store_inner_core),
+ size_padded*sizeof(realw)),4700);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c12store_inner_core),
+ size_padded*sizeof(realw)),4701);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c13store_inner_core),
+ size_padded*sizeof(realw)),4702);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c33store_inner_core),
+ size_padded*sizeof(realw)),4703);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c44store_inner_core),
+ size_padded*sizeof(realw)),4704);
+
+ // transfer constant element data with padding
+ for(int i=0;i < mp->NSPEC_INNER_CORE;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c11store_inner_core + i*NGLL3_PADDED, &c11store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4800);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c12store_inner_core + i*NGLL3_PADDED, &c12store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4801);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c13store_inner_core + i*NGLL3_PADDED, &c13store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4802);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c33store_inner_core + i*NGLL3_PADDED, &c33store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4803);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c44store_inner_core + i*NGLL3_PADDED, &c44store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4804);
+ }
+ }
+
+ // needed for boundary kernel calculations
+ if( mp->simulation_type == 3 && mp->save_boundary_mesh ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rhostore_inner_core, size_padded*sizeof(realw)),1010);
+ for(int i=0;i < mp->NSPEC_INNER_CORE;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rhostore_inner_core+i*NGLL3_PADDED, &h_rho[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),2106);
+ }
+ }
+
+ // global indexing
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_inner_core, size_padded*sizeof(int)),1021);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_inner_core, h_ibool,
+ NGLL3*(mp->NSPEC_INNER_CORE)*sizeof(int),cudaMemcpyHostToDevice),1022);
+
+ // fictious element flags
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_idoubling_inner_core,
+ mp->NSPEC_INNER_CORE*sizeof(int)),2010);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_idoubling_inner_core, h_idoubling_inner_core,
+ mp->NSPEC_INNER_CORE*sizeof(int),cudaMemcpyHostToDevice),2011);
+
+ // mesh locations
+ // only needed when gravity is on
+ if( mp->gravity ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_xstore_inner_core),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xstore_inner_core,h_xstore,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ystore_inner_core),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ystore_inner_core,h_ystore,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_zstore_inner_core),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_zstore_inner_core,h_zstore,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ }
+
+ // inner/outer elements
+ mp->num_phase_ispec_inner_core = *num_phase_ispec;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_inner_core),
+ mp->num_phase_ispec_inner_core*2*sizeof(int)),2008);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_inner_core,phase_ispec_inner,
+ mp->num_phase_ispec_inner_core*2*sizeof(int),cudaMemcpyHostToDevice),2101);
+
+ mp->nspec_outer_inner_core = *nspec_outer;
+ mp->nspec_inner_inner_core = *nspec_inner;
+
+
+ // wavefield
+ int size = NDIM * mp->NGLOB_INNER_CORE;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_inner_core),sizeof(realw)*size),4001);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc_inner_core),sizeof(realw)*size),4002);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel_inner_core),sizeof(realw)*size),4003);
+ // backward/reconstructed wavefield
+ if( mp->simulation_type == 3 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ_inner_core),sizeof(realw)*size),4001);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc_inner_core),sizeof(realw)*size),4002);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel_inner_core),sizeof(realw)*size),4003);
+ }
+
+ // mass matrix
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_inner_core),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_inner_core,h_rmass,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+
+
+ // mpi communication
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_iboolright_xi_inner_core,npoin2D_xi*sizeof(int)),270);
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_iboolleft_xi_inner_core,npoin2D_xi*sizeof(int)),280);
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_iboolright_eta_inner_core,npoin2D_eta*sizeof(int)),290);
+// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_iboolleft_eta_inner_core,npoin2D_eta*sizeof(int)),300);
+//
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_iboolright_xi_inner_core,iboolright_xi,
+// npoin2D_xi*sizeof(int),cudaMemcpyHostToDevice),91);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_iboolleft_xi_inner_core,iboolleft_xi,
+// npoin2D_xi*sizeof(int),cudaMemcpyHostToDevice),92);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_iboolright_eta_inner_core,iboolright_eta,
+// npoin2D_eta*sizeof(int),cudaMemcpyHostToDevice),93);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_iboolleft_eta_inner_core,iboolleft_eta,
+// npoin2D_eta*sizeof(int),cudaMemcpyHostToDevice),94);
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_inner_core_device");
+#endif
+}
+
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for ACOUSTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+/*
+extern "C"
+void FC_FUNC_(prepare_fields_acoustic_device,
+ PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f,
+ realw* rmass_acoustic,
+ realw* rhostore,
+ realw* kappastore,
+ int* num_phase_ispec_acoustic,
+ int* phase_ispec_inner_acoustic,
+ int* ispec_is_acoustic,
+ int* NOISE_TOMOGRAPHY,
+ int* num_free_surface_faces,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* ABSORBING_CONDITIONS,
+ int* b_reclen_potential,
+ realw* b_absorb_potential,
+ int* ELASTIC_SIMULATION,
+ int* num_coupling_ac_el_faces,
+ int* coupling_ac_el_ispec,
+ int* coupling_ac_el_ijk,
+ realw* coupling_ac_el_normal,
+ realw* coupling_ac_el_jacobian2Dw,
+ int* num_colors_outer_acoustic,
+ int* num_colors_inner_acoustic,
+ int* num_elem_colors_acoustic) {
+
+ TRACE("prepare_fields_acoustic_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+ // Assuming NGLLX==5. Padded is then 128 (5^3+3)
+ int size_padded = NGLL3_PADDED * mp->NSPEC_AB;
+ int size_nonpadded = NGLL3 * mp->NSPEC_AB;
+ int size_glob = mp->NGLOB_AB;
+
+ // allocates arrays on device (GPU)
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_acoustic),sizeof(realw)*size_glob),2001);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_dot_acoustic),sizeof(realw)*size_glob),2002);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_dot_dot_acoustic),sizeof(realw)*size_glob),2003);
+
+ // mpi buffer
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_potential_dot_dot_buffer),
+ (mp->max_nibool_interfaces_ext_mesh)*(mp->num_interfaces_ext_mesh)*sizeof(realw)),2004);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_acoustic),sizeof(realw)*size_glob),2005);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_acoustic,rmass_acoustic,
+ sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+
+ // padded array
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rhostore),size_padded*sizeof(realw)),2006);
+ // transfer constant element data with padding
+ for(int i=0; i < mp->NSPEC_AB; i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rhostore+i*NGLL3_PADDED, &rhostore[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),2106);
+ }
+
+ // non-padded array
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappastore),size_nonpadded*sizeof(realw)),2007);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_kappastore,kappastore,
+ NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),2105);
+
+ // phase elements
+ mp->num_phase_ispec_acoustic = *num_phase_ispec_acoustic;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_acoustic),
+ mp->num_phase_ispec_acoustic*2*sizeof(int)),2008);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic,
+ mp->num_phase_ispec_acoustic*2*sizeof(int),cudaMemcpyHostToDevice),2101);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_acoustic),
+ mp->NSPEC_AB*sizeof(int)),2009);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_acoustic,ispec_is_acoustic,
+ mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),2102);
+
+ // free surface
+ if( *NOISE_TOMOGRAPHY == 0 ){
+ // allocate surface arrays
+ mp->num_free_surface_faces = *num_free_surface_faces;
+ if( mp->num_free_surface_faces > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),
+ mp->num_free_surface_faces*sizeof(int)),2201);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
+ mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2203);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),
+ 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),2202);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
+ 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2204);
+ }
+ }
+
+ // absorbing boundaries
+ if( *ABSORBING_CONDITIONS ){
+ mp->d_b_reclen_potential = *b_reclen_potential;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_potential),mp->d_b_reclen_potential),2301);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,b_absorb_potential,
+ mp->d_b_reclen_potential,cudaMemcpyHostToDevice),2302);
+ }
+
+
+ // for seismograms
+ if( mp->nrec_local > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_potential),
+ mp->nrec_local*NGLL3*sizeof(realw)),2107);
+
+ mp->h_station_seismo_potential = (realw*) malloc( mp->nrec_local*NGLL3*sizeof(realw) );
+ if( mp->h_station_seismo_potential == NULL) exit_on_error("error allocating h_station_seismo_potential");
+ }
+
+
+ // coupling with elastic parts
+ if( *ELASTIC_SIMULATION && *num_coupling_ac_el_faces > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ispec),
+ (*num_coupling_ac_el_faces)*sizeof(int)),2601);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ispec,coupling_ac_el_ispec,
+ (*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2602);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ijk),
+ 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int)),2603);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ijk,coupling_ac_el_ijk,
+ 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2604);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_normal),
+ 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2605);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_normal,coupling_ac_el_normal,
+ 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2606);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_jacobian2Dw),
+ NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2607);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_jacobian2Dw,coupling_ac_el_jacobian2Dw,
+ NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2608);
+
+ }
+
+ // mesh coloring
+ if( mp->use_mesh_coloring_gpu ){
+ mp->num_colors_outer_acoustic = *num_colors_outer_acoustic;
+ mp->num_colors_inner_acoustic = *num_colors_inner_acoustic;
+ mp->h_num_elem_colors_acoustic = (int*) num_elem_colors_acoustic;
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_fields_acoustic_device");
+#endif
+}
+*/
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/*
+extern "C"
+void FC_FUNC_(prepare_fields_acoustic_adj_dev,
+ PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
+ int* SIMULATION_TYPE,
+ int* APPROXIMATE_HESS_KL) {
+
+ TRACE("prepare_fields_acoustic_adj_dev");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ int size_glob = mp->NGLOB_AB;
+
+ // kernel simulations
+ if( *SIMULATION_TYPE != 3 ) return;
+
+ // allocates backward/reconstructed arrays on device (GPU)
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_acoustic),sizeof(realw)*size_glob),3014);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_acoustic),sizeof(realw)*size_glob),3015);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_dot_acoustic),sizeof(realw)*size_glob),3016);
+
+ // allocates kernels
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_ac_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),3017);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_ac_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),3018);
+
+ // initializes kernel values to zero
+ print_CUDA_error_if_any(cudaMemset(mp->d_rho_ac_kl,0,
+ NGLL3*mp->NSPEC_AB*sizeof(realw)),3019);
+ print_CUDA_error_if_any(cudaMemset(mp->d_kappa_ac_kl,0,
+ NGLL3*mp->NSPEC_AB*sizeof(realw)),3020);
+
+ // preconditioner
+ if( *APPROXIMATE_HESS_KL ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hess_ac_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),3030);
+ // initializes with zeros
+ print_CUDA_error_if_any(cudaMemset(mp->d_hess_ac_kl,0,
+ NGLL3*mp->NSPEC_AB*sizeof(realw)),3031);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_fields_acoustic_adj_dev");
+#endif
+}
+*/
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for ELASTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/*
+extern "C"
+void FC_FUNC_(prepare_fields_elastic_device,
+ PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
+ int* size,
+ realw* rmass,
+ realw* rho_vp,
+ realw* rho_vs,
+ int* num_phase_ispec_elastic,
+ int* phase_ispec_inner_elastic,
+ int* ispec_is_elastic,
+ int* ABSORBING_CONDITIONS,
+ realw* h_b_absorb_field,
+ int* h_b_reclen_field,
+ int* SIMULATION_TYPE,int* SAVE_FORWARD,
+ int* COMPUTE_AND_STORE_STRAIN,
+ realw* epsilondev_xx,realw* epsilondev_yy,realw* epsilondev_xy,
+ realw* epsilondev_xz,realw* epsilondev_yz,
+ int* ATTENUATION,
+ int* R_size,
+ realw* R_xx,realw* R_yy,realw* R_xy,realw* R_xz,realw* R_yz,
+ realw* one_minus_sum_beta,realw* factor_common,
+ realw* alphaval,realw* betaval,realw* gammaval,
+ int* OCEANS,
+ realw* rmass_ocean_load,
+ int* NOISE_TOMOGRAPHY,
+ realw* free_surface_normal,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* num_free_surface_faces,
+ int* ACOUSTIC_SIMULATION,
+ int* num_colors_outer_elastic,
+ int* num_colors_inner_elastic,
+ int* num_elem_colors_elastic,
+ int* ANISOTROPY,
+ realw *c11store,
+ realw *c12store,
+ realw *c13store,
+ realw *c14store,
+ realw *c15store,
+ realw *c16store,
+ realw *c22store,
+ realw *c23store,
+ realw *c24store,
+ realw *c25store,
+ realw *c26store,
+ realw *c33store,
+ realw *c34store,
+ realw *c35store,
+ realw *c36store,
+ realw *c44store,
+ realw *c45store,
+ realw *c46store,
+ realw *c55store,
+ realw *c56store,
+ realw *c66store){
+
+TRACE("prepare_fields_elastic_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+ // Assuming NGLLX==5. Padded is then 128 (5^3+3)
+ int size_padded = NGLL3_PADDED * (mp->NSPEC_AB);
+ int size_nonpadded = NGLL3 * (mp->NSPEC_AB);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ),sizeof(realw)*(*size)),4001);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc),sizeof(realw)*(*size)),4002);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel),sizeof(realw)*(*size)),4003);
+
+ // mpi buffer
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer),
+ 3*(mp->max_nibool_interfaces_ext_mesh)*(mp->num_interfaces_ext_mesh)*sizeof(realw)),4004);
+
+ // mass matrix
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass),sizeof(realw)*mp->NGLOB_AB),4005);
+ // transfer element data
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,rmass,
+ sizeof(realw)*mp->NGLOB_AB,cudaMemcpyHostToDevice),4010);
+
+
+ // element indices
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_elastic),mp->NSPEC_AB*sizeof(int)),4009);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_elastic,ispec_is_elastic,
+ mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),4012);
+
+ // phase elements
+ mp->num_phase_ispec_elastic = *num_phase_ispec_elastic;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_elastic),
+ mp->num_phase_ispec_elastic*2*sizeof(int)),4008);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic,
+ mp->num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),4011);
+
+ // for seismograms
+ if( mp->nrec_local > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_field),
+ 3*NGLL3*(mp->nrec_local)*sizeof(realw)),4015);
+
+ mp->h_station_seismo_field = (realw*) malloc( 3*NGLL3*(mp->nrec_local)*sizeof(realw) );
+ if( mp->h_station_seismo_field == NULL) exit_on_error("h_station_seismo_field not allocated \n");
+ }
+
+ // absorbing conditions
+ if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0){
+ // non-padded arrays
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vp),size_nonpadded*sizeof(realw)),4006);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vs),size_nonpadded*sizeof(realw)),4007);
+
+ // rho_vp, rho_vs non-padded; they are needed for stacey boundary condition
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp, rho_vp,
+ NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4013);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs, rho_vs,
+ NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4014);
+
+ // absorb_field array used for file i/o
+ if(*SIMULATION_TYPE == 3 || ( *SIMULATION_TYPE == 1 && *SAVE_FORWARD )){
+ mp->d_b_reclen_field = *h_b_reclen_field;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_field),
+ mp->d_b_reclen_field),4016);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field,
+ mp->d_b_reclen_field,cudaMemcpyHostToDevice),4017);
+ }
+ }
+
+ // strains used for attenuation and kernel simulations
+ if( *COMPUTE_AND_STORE_STRAIN ){
+ // strains
+ int epsilondev_size = NGLL3*mp->NSPEC_AB; // note: non-aligned; if align, check memcpy below and indexing
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xx,
+ epsilondev_size*sizeof(realw)),4301);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xx,epsilondev_xx,epsilondev_size*sizeof(realw),
+ cudaMemcpyHostToDevice),4302);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yy,
+ epsilondev_size*sizeof(realw)),4302);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yy,epsilondev_yy,epsilondev_size*sizeof(realw),
+ cudaMemcpyHostToDevice),4303);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xy,
+ epsilondev_size*sizeof(realw)),4304);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xy,epsilondev_xy,epsilondev_size*sizeof(realw),
+ cudaMemcpyHostToDevice),4305);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xz,
+ epsilondev_size*sizeof(realw)),4306);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_xz,epsilondev_xz,epsilondev_size*sizeof(realw),
+ cudaMemcpyHostToDevice),4307);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yz,
+ epsilondev_size*sizeof(realw)),4308);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilondev_yz,epsilondev_yz,epsilondev_size*sizeof(realw),
+ cudaMemcpyHostToDevice),4309);
+
+ }
+
+ // attenuation memory variables
+ if( *ATTENUATION ){
+ // memory arrays
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xx),
+ (*R_size)*sizeof(realw)),4401);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xx,R_xx,(*R_size)*sizeof(realw),
+ cudaMemcpyHostToDevice),4402);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yy),
+ (*R_size)*sizeof(realw)),4403);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yy,R_yy,(*R_size)*sizeof(realw),
+ cudaMemcpyHostToDevice),4404);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xy),
+ (*R_size)*sizeof(realw)),4405);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xy,R_xy,(*R_size)*sizeof(realw),
+ cudaMemcpyHostToDevice),4406);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_xz),
+ (*R_size)*sizeof(realw)),4407);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_xz,R_xz,(*R_size)*sizeof(realw),
+ cudaMemcpyHostToDevice),4408);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_R_yz),
+ (*R_size)*sizeof(realw)),4409);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_R_yz,R_yz,(*R_size)*sizeof(realw),
+ cudaMemcpyHostToDevice),4410);
+
+ // attenuation factors
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_one_minus_sum_beta),
+ NGLL3*mp->NSPEC_AB*sizeof(realw)),4430);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_one_minus_sum_beta ,one_minus_sum_beta,
+ NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4431);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_factor_common),
+ N_SLS*NGLL3*mp->NSPEC_AB*sizeof(realw)),4432);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_factor_common ,factor_common,
+ N_SLS*NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),4433);
+
+ // alpha,beta,gamma factors
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_alphaval),
+ N_SLS*sizeof(realw)),4434);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_alphaval ,alphaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4435);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_betaval),
+ N_SLS*sizeof(realw)),4436);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_betaval ,betaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4437);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_gammaval),
+ N_SLS*sizeof(realw)),4438);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaval ,gammaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),4439);
+
+ }
+
+ // anisotropy
+ if( *ANISOTROPY ){
+ // allocates memory on GPU
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c11store),
+ size_padded*sizeof(realw)),4700);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c12store),
+ size_padded*sizeof(realw)),4701);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c13store),
+ size_padded*sizeof(realw)),4702);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c14store),
+ size_padded*sizeof(realw)),4703);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c15store),
+ size_padded*sizeof(realw)),4704);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c16store),
+ size_padded*sizeof(realw)),4705);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c22store),
+ size_padded*sizeof(realw)),4706);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c23store),
+ size_padded*sizeof(realw)),4707);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c24store),
+ size_padded*sizeof(realw)),4708);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c25store),
+ size_padded*sizeof(realw)),4709);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c26store),
+ size_padded*sizeof(realw)),4710);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c33store),
+ size_padded*sizeof(realw)),4711);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c34store),
+ size_padded*sizeof(realw)),4712);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c35store),
+ size_padded*sizeof(realw)),4713);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c36store),
+ size_padded*sizeof(realw)),4714);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c44store),
+ size_padded*sizeof(realw)),4715);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c45store),
+ size_padded*sizeof(realw)),4716);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c46store),
+ size_padded*sizeof(realw)),4717);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c55store),
+ size_padded*sizeof(realw)),4718);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c56store),
+ size_padded*sizeof(realw)),4719);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_c66store),
+ size_padded*sizeof(realw)),4720);
+
+ // transfer constant element data with padding
+ for(int i=0;i < mp->NSPEC_AB;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c11store + i*NGLL3_PADDED, &c11store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4800);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c12store + i*NGLL3_PADDED, &c12store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4801);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c13store + i*NGLL3_PADDED, &c13store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4802);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c14store + i*NGLL3_PADDED, &c14store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4803);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c15store + i*NGLL3_PADDED, &c15store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4804);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c16store + i*NGLL3_PADDED, &c16store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4805);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c22store + i*NGLL3_PADDED, &c22store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4806);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c23store + i*NGLL3_PADDED, &c23store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4807);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c24store + i*NGLL3_PADDED, &c24store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4808);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c25store + i*NGLL3_PADDED, &c25store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4809);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c26store + i*NGLL3_PADDED, &c26store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4810);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c33store + i*NGLL3_PADDED, &c33store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4811);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c34store + i*NGLL3_PADDED, &c34store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4812);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c35store + i*NGLL3_PADDED, &c35store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4813);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c36store + i*NGLL3_PADDED, &c36store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4814);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c44store + i*NGLL3_PADDED, &c44store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4815);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c45store + i*NGLL3_PADDED, &c45store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4816);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c46store + i*NGLL3_PADDED, &c46store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4817);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c55store + i*NGLL3_PADDED, &c55store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4818);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c56store + i*NGLL3_PADDED, &c56store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4819);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_c66store + i*NGLL3_PADDED, &c66store[i*NGLL3],
+ NGLL3*sizeof(realw),cudaMemcpyHostToDevice),4820);
+ }
+ }
+
+ // ocean load approximation
+ if( *OCEANS ){
+ // oceans needs a free surface
+ mp->num_free_surface_faces = *num_free_surface_faces;
+ if( mp->num_free_surface_faces > 0 ){
+ // mass matrix
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_ocean_load),
+ sizeof(realw)*mp->NGLOB_AB),4501);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_ocean_load,rmass_ocean_load,
+ sizeof(realw)*mp->NGLOB_AB,cudaMemcpyHostToDevice),4502);
+ // surface normal
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_normal),
+ 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw)),4503);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_normal,free_surface_normal,
+ 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw),cudaMemcpyHostToDevice),4504);
+
+ // temporary global array: used to synchronize updates on global accel array
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_updated_dof_ocean_load),
+ sizeof(int)*mp->NGLOB_AB),4505);
+
+ if( *NOISE_TOMOGRAPHY == 0 && *ACOUSTIC_SIMULATION == 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),
+ mp->num_free_surface_faces*sizeof(int)),4601);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
+ mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4603);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),
+ 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),4602);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
+ 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),4604);
+ }
+ }
+ }
+
+ // mesh coloring
+ if( mp->use_mesh_coloring_gpu ){
+ mp->num_colors_outer_elastic = *num_colors_outer_elastic;
+ mp->num_colors_inner_elastic = *num_colors_inner_elastic;
+ mp->h_num_elem_colors_elastic = (int*) num_elem_colors_elastic;
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_fields_elastic_device");
+#endif
+}
+*/
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/*
+extern "C"
+void FC_FUNC_(prepare_fields_elastic_adj_dev,
+ PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
+ int* size,
+ int* SIMULATION_TYPE,
+ int* COMPUTE_AND_STORE_STRAIN,
+ realw* epsilon_trace_over_3,
+ realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy,
+ realw* b_epsilondev_xz,realw* b_epsilondev_yz,
+ realw* b_epsilon_trace_over_3,
+ int* ATTENUATION,
+ int* R_size,
+ realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
+ realw* b_alphaval,realw* b_betaval,realw* b_gammaval,
+ int* APPROXIMATE_HESS_KL){
+
+ TRACE("prepare_fields_elastic_adj_dev");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // checks if kernel simulation
+ if( *SIMULATION_TYPE != 3 ) return;
+
+ // kernel simulations
+ // allocates backward/reconstructed arrays on device (GPU)
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ),sizeof(realw)*(*size)),5201);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc),sizeof(realw)*(*size)),5202);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel),sizeof(realw)*(*size)),5203);
+
+ // allocates kernels
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),5204);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_mu_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),5205);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),5206);
+
+ // initializes kernel values to zero
+ print_CUDA_error_if_any(cudaMemset(mp->d_rho_kl,0,
+ NGLL3*mp->NSPEC_AB*sizeof(realw)),5207);
+ print_CUDA_error_if_any(cudaMemset(mp->d_mu_kl,0,
+ NGLL3*mp->NSPEC_AB*sizeof(realw)),5208);
+ print_CUDA_error_if_any(cudaMemset(mp->d_kappa_kl,0,
+ NGLL3*mp->NSPEC_AB*sizeof(realw)),5209);
+
+ // strains used for attenuation and kernel simulations
+ if( *COMPUTE_AND_STORE_STRAIN ){
+ // strains
+ int epsilondev_size = NGLL3*mp->NSPEC_AB; // note: non-aligned; if align, check memcpy below and indexing
+
+ // solid pressure
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3),
+ NGLL3*mp->NSPEC_AB*sizeof(realw)),5310);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3,
+ NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5311);
+ // backward solid pressure
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3),
+ NGLL3*mp->NSPEC_AB*sizeof(realw)),5312);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3,
+ NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5313);
+ // prepares backward strains
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx),
+ epsilondev_size*sizeof(realw)),5321);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy),
+ epsilondev_size*sizeof(realw)),5322);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy),
+ epsilondev_size*sizeof(realw)),5323);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz),
+ epsilondev_size*sizeof(realw)),5324);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz),
+ epsilondev_size*sizeof(realw)),5325);
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,
+ epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5326);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,
+ epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5327);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,
+ epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5328);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,
+ epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5329);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,
+ epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5330);
+ }
+
+ // attenuation memory variables
+ if( *ATTENUATION ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xx),
+ (*R_size)*sizeof(realw)),5421);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xx,b_R_xx,(*R_size)*sizeof(realw),
+ cudaMemcpyHostToDevice),5422);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yy),
+ (*R_size)*sizeof(realw)),5423);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yy,b_R_yy,(*R_size)*sizeof(realw),
+ cudaMemcpyHostToDevice),5424);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xy),
+ (*R_size)*sizeof(realw)),5425);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xy,b_R_xy,(*R_size)*sizeof(realw),
+ cudaMemcpyHostToDevice),5426);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xz),
+ (*R_size)*sizeof(realw)),5427);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xz,b_R_xz,(*R_size)*sizeof(realw),
+ cudaMemcpyHostToDevice),5428);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yz),
+ (*R_size)*sizeof(realw)),5429);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yz,b_R_yz,(*R_size)*sizeof(realw),
+ cudaMemcpyHostToDevice),5420);
+
+ // alpha,beta,gamma factors for backward fields
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_alphaval),
+ N_SLS*sizeof(realw)),5434);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5435);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_betaval),
+ N_SLS*sizeof(realw)),5436);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5437);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_gammaval),
+ N_SLS*sizeof(realw)),5438);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval,
+ N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5439);
+ }
+
+ if( *APPROXIMATE_HESS_KL ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hess_el_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),5450);
+ // initializes with zeros
+ print_CUDA_error_if_any(cudaMemset(mp->d_hess_el_kl,0,
+ NGLL3*mp->NSPEC_AB*sizeof(realw)),5451);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_fields_elastic_adj_dev");
+#endif
+}
+*/
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// purely adjoint & kernel simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/*
+extern "C"
+void FC_FUNC_(prepare_sim2_or_3_const_device,
+ PREPARE_SIM2_OR_3_CONST_DEVICE)(
+ long* Mesh_pointer_f,
+ int* islice_selected_rec,
+ int* islice_selected_rec_size,
+ int* nadj_rec_local,
+ int* nrec,
+ int* myrank) {
+
+ TRACE("prepare_sim2_or_3_const_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // allocates arrays for receivers
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_rec,
+ *islice_selected_rec_size*sizeof(int)),6001);
+ // copies arrays to GPU device
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_rec,islice_selected_rec,
+ *islice_selected_rec_size*sizeof(int),cudaMemcpyHostToDevice),6002);
+
+ // adjoint source arrays
+ mp->nadj_rec_local = *nadj_rec_local;
+ if( mp->nadj_rec_local > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_adj_sourcearrays,
+ (mp->nadj_rec_local)*3*NGLL3*sizeof(realw)),6003);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_pre_computed_irec,
+ (mp->nadj_rec_local)*sizeof(int)),6004);
+
+ // prepares local irec array:
+ // the irec_local variable needs to be precomputed (as
+ // h_pre_comp..), because normally it is in the loop updating accel,
+ // and due to how it's incremented, it cannot be parallelized
+ int* h_pre_computed_irec = (int*) malloc( (mp->nadj_rec_local)*sizeof(int) );
+ if( h_pre_computed_irec == NULL ) exit_on_error("prepare_sim2_or_3_const_device: h_pre_computed_irec not allocated\n");
+
+ int irec_local = 0;
+ for(int irec = 0; irec < *nrec; irec++) {
+ if(*myrank == islice_selected_rec[irec]) {
+ irec_local++;
+ h_pre_computed_irec[irec_local-1] = irec;
+ }
+ }
+ if( irec_local != mp->nadj_rec_local ) exit_on_error("prepare_sim2_or_3_const_device: irec_local not equal\n");
+ // copies values onto GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_pre_computed_irec,h_pre_computed_irec,
+ (mp->nadj_rec_local)*sizeof(int),cudaMemcpyHostToDevice),6010);
+ free(h_pre_computed_irec);
+
+ // temporary array to prepare extracted source array values
+ mp->h_adj_sourcearrays_slice = (realw*) malloc( (mp->nadj_rec_local)*3*NGLL3*sizeof(realw) );
+ if( mp->h_adj_sourcearrays_slice == NULL ) exit_on_error("h_adj_sourcearrays_slice not allocated\n");
+
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_sim2_or_3_const_device");
+#endif
+}
+*/
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for NOISE simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/*
+extern "C"
+void FC_FUNC_(prepare_fields_noise_device,
+ PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
+ int* NSPEC_AB, int* NGLOB_AB,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* num_free_surface_faces,
+ int* SIMULATION_TYPE,
+ int* NOISE_TOMOGRAPHY,
+ int* NSTEP,
+ realw* noise_sourcearray,
+ realw* normal_x_noise,
+ realw* normal_y_noise,
+ realw* normal_z_noise,
+ realw* mask_noise,
+ realw* free_surface_jacobian2Dw) {
+
+ TRACE("prepare_fields_noise_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // free surface
+ mp->num_free_surface_faces = *num_free_surface_faces;
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ispec,
+ mp->num_free_surface_faces*sizeof(int)),7001);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec, free_surface_ispec,
+ mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),7002);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ijk,
+ 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),7003);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
+ 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),7004);
+
+ // alloc storage for the surface buffer to be copied
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie,
+ 3*NGLL2*mp->num_free_surface_faces*sizeof(realw)),7005);
+
+ // prepares noise source array
+ if( *NOISE_TOMOGRAPHY == 1 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray,
+ 3*NGLL3*(*NSTEP)*sizeof(realw)),7101);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_noise_sourcearray, noise_sourcearray,
+ 3*NGLL3*(*NSTEP)*sizeof(realw),cudaMemcpyHostToDevice),7102);
+ }
+
+ // prepares noise directions
+ if( *NOISE_TOMOGRAPHY > 1 ){
+ int nface_size = NGLL2*(*num_free_surface_faces);
+ // allocates memory on GPU
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_x_noise,
+ nface_size*sizeof(realw)),7301);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_y_noise,
+ nface_size*sizeof(realw)),7302);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise,
+ nface_size*sizeof(realw)),7303);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise,
+ nface_size*sizeof(realw)),7304);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_free_surface_jacobian2Dw,
+ nface_size*sizeof(realw)),7305);
+ // transfers data onto GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise,
+ nface_size*sizeof(realw),cudaMemcpyHostToDevice),7306);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise,
+ nface_size*sizeof(realw),cudaMemcpyHostToDevice),7307);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise,
+ nface_size*sizeof(realw),cudaMemcpyHostToDevice),7308);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise,
+ nface_size*sizeof(realw),cudaMemcpyHostToDevice),7309);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_jacobian2Dw, free_surface_jacobian2Dw,
+ nface_size*sizeof(realw),cudaMemcpyHostToDevice),7310);
+ }
+
+ // prepares noise strength kernel
+ if( *NOISE_TOMOGRAPHY == 3 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_Sigma_kl),
+ NGLL3*(mp->NSPEC_AB)*sizeof(realw)),7401);
+ // initializes kernel values to zero
+ print_CUDA_error_if_any(cudaMemset(mp->d_Sigma_kl,0,
+ NGLL3*mp->NSPEC_AB*sizeof(realw)),7403);
+
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("jacobian_size = %d\n",25*(*num_free_surface_faces));
+ exit_on_cuda_error("prepare_fields_noise_device");
+#endif
+}
+*/
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// cleanup
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_cleanup_device,
+ PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f) {
+
+TRACE("prepare_cleanup_device");
+
+ // frees allocated memory arrays
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // frees memory on GPU
+ // crust_mantle
+ cudaFree(mp->d_xix_crust_mantle);
+ cudaFree(mp->d_xiy_crust_mantle);
+ cudaFree(mp->d_xiz_crust_mantle);
+ cudaFree(mp->d_etax_crust_mantle);
+ cudaFree(mp->d_etay_crust_mantle);
+ cudaFree(mp->d_etaz_crust_mantle);
+ cudaFree(mp->d_gammax_crust_mantle);
+ cudaFree(mp->d_gammay_crust_mantle);
+ cudaFree(mp->d_gammaz_crust_mantle);
+
+ cudaFree(mp->d_muvstore_crust_mantle);
+ cudaFree(mp->d_ibool_crust_mantle);
+
+ if( ! mp->anisotropic_3D_mantle ){
+ cudaFree(mp->d_kappavstore_crust_mantle);
+ cudaFree(mp->d_kappahstore_crust_mantle);
+ cudaFree(mp->d_muhstore_crust_mantle);
+ cudaFree(mp->d_eta_anisostore_crust_mantle);
+ cudaFree(mp->d_ispec_is_tiso_crust_mantle);
+ }else{
+ cudaFree(mp->d_c11store_crust_mantle);
+ cudaFree(mp->d_c12store_crust_mantle);
+ cudaFree(mp->d_c13store_crust_mantle);
+ cudaFree(mp->d_c14store_crust_mantle);
+ cudaFree(mp->d_c15store_crust_mantle);
+ cudaFree(mp->d_c16store_crust_mantle);
+ cudaFree(mp->d_c22store_crust_mantle);
+ cudaFree(mp->d_c23store_crust_mantle);
+ cudaFree(mp->d_c24store_crust_mantle);
+ cudaFree(mp->d_c25store_crust_mantle);
+ cudaFree(mp->d_c26store_crust_mantle);
+ cudaFree(mp->d_c33store_crust_mantle);
+ cudaFree(mp->d_c34store_crust_mantle);
+ cudaFree(mp->d_c35store_crust_mantle);
+ cudaFree(mp->d_c36store_crust_mantle);
+ cudaFree(mp->d_c44store_crust_mantle);
+ cudaFree(mp->d_c45store_crust_mantle);
+ cudaFree(mp->d_c46store_crust_mantle);
+ cudaFree(mp->d_c55store_crust_mantle);
+ cudaFree(mp->d_c56store_crust_mantle);
+ cudaFree(mp->d_c66store_crust_mantle);
+ }
+
+ if( mp->simulation_type == 3 && mp->save_boundary_mesh ){
+ cudaFree(mp->d_rhostore_crust_mantle);
+ }
+
+ cudaFree(mp->d_ystore_crust_mantle);
+ cudaFree(mp->d_zstore_crust_mantle);
+ if( mp->gravity ){
+ cudaFree(mp->d_xstore_crust_mantle);
+ }
+
+ cudaFree(mp->d_displ_crust_mantle);
+ cudaFree(mp->d_veloc_crust_mantle);
+ cudaFree(mp->d_accel_crust_mantle);
+ if( mp->simulation_type == 3 ){
+ cudaFree(mp->d_b_displ_crust_mantle);
+ cudaFree(mp->d_b_veloc_crust_mantle);
+ cudaFree(mp->d_b_accel_crust_mantle);
+ }
+
+
+ // outer_core
+ cudaFree(mp->d_xix_outer_core);
+ cudaFree(mp->d_xiy_outer_core);
+ cudaFree(mp->d_xiz_outer_core);
+ cudaFree(mp->d_etax_outer_core);
+ cudaFree(mp->d_etay_outer_core);
+ cudaFree(mp->d_etaz_outer_core);
+ cudaFree(mp->d_gammax_outer_core);
+ cudaFree(mp->d_gammay_outer_core);
+ cudaFree(mp->d_gammaz_outer_core);
+
+ cudaFree(mp->d_kappavstore_outer_core);
+ if( mp->simulation_type == 3 ){
+ cudaFree(mp->d_rhostore_outer_core);
+ }
+
+ cudaFree(mp->d_xstore_outer_core);
+ cudaFree(mp->d_ystore_outer_core);
+ cudaFree(mp->d_zstore_outer_core);
+ cudaFree(mp->d_ibool_outer_core);
+
+ cudaFree(mp->d_displ_outer_core);
+ cudaFree(mp->d_veloc_outer_core);
+ cudaFree(mp->d_accel_outer_core);
+ if( mp->simulation_type == 3 ){
+ cudaFree(mp->d_b_displ_outer_core);
+ cudaFree(mp->d_b_veloc_outer_core);
+ cudaFree(mp->d_b_accel_outer_core);
+ }
+
+ // inner_core
+ cudaFree(mp->d_xix_inner_core);
+ cudaFree(mp->d_xiy_inner_core);
+ cudaFree(mp->d_xiz_inner_core);
+ cudaFree(mp->d_etax_inner_core);
+ cudaFree(mp->d_etay_inner_core);
+ cudaFree(mp->d_etaz_inner_core);
+ cudaFree(mp->d_gammax_inner_core);
+ cudaFree(mp->d_gammay_inner_core);
+ cudaFree(mp->d_gammaz_inner_core);
+
+
+ cudaFree(mp->d_muvstore_inner_core);
+ cudaFree(mp->d_ibool_inner_core);
+
+ if( mp->gravity ){
+ cudaFree(mp->d_xstore_inner_core);
+ cudaFree(mp->d_ystore_inner_core);
+ cudaFree(mp->d_zstore_inner_core);
+ }
+
+ if( ! mp->anisotropic_inner_core ){
+ cudaFree(mp->d_kappavstore_inner_core);
+ }else{
+ cudaFree(mp->d_c11store_inner_core);
+ cudaFree(mp->d_c12store_inner_core);
+ cudaFree(mp->d_c13store_inner_core);
+ cudaFree(mp->d_c33store_inner_core);
+ cudaFree(mp->d_c44store_inner_core);
+ }
+
+ if( mp->simulation_type == 3 && mp->save_boundary_mesh ){
+ cudaFree(mp->d_rhostore_inner_core);
+ }
+
+ cudaFree(mp->d_displ_inner_core);
+ cudaFree(mp->d_veloc_inner_core);
+ cudaFree(mp->d_accel_inner_core);
+ if( mp->simulation_type == 3 ) {
+ cudaFree(mp->d_b_displ_inner_core);
+ cudaFree(mp->d_b_veloc_inner_core);
+ cudaFree(mp->d_b_accel_inner_core);
+ }
+
+ // sources
+ if( mp->simulation_type == 1 || mp->simulation_type == 3 ){
+ cudaFree(mp->d_sourcearrays);
+ cudaFree(mp->d_stf_pre_compute);
+ }
+
+ cudaFree(mp->d_islice_selected_source);
+ cudaFree(mp->d_ispec_selected_source);
+
+ // receivers
+ if( mp->nrec_local > 0 ) cudaFree(mp->d_number_receiver_global);
+ cudaFree(mp->d_ispec_selected_rec);
+
+ // rotation arrays
+ if( mp->rotation ){
+ cudaFree(mp->d_A_array_rotation);
+ cudaFree(mp->d_B_array_rotation);
+ if( mp->simulation_type == 3 ){
+ cudaFree(mp->d_b_A_array_rotation);
+ cudaFree(mp->d_b_B_array_rotation);
+ }
+ }
+
+ // gravity arrays
+ if( ! mp->gravity ){
+ cudaFree(mp->d_d_ln_density_dr_table);
+ }else{
+ cudaFree(mp->d_minus_rho_g_over_kappa_fluid);
+ cudaFree(mp->d_minus_gravity_table);
+ cudaFree(mp->d_minus_deriv_gravity_table);
+ cudaFree(mp->d_density_table);
+ }
+
+ // attenuation arrays
+ if( mp->attenuation ){
+ cudaFree(mp->d_one_minus_sum_beta_crust_mantle);
+ cudaFree(mp->d_one_minus_sum_beta_inner_core);
+ if( ! mp->use_attenuation_mimic ){
+ cudaFree(mp->d_R_xx_crust_mantle);
+ cudaFree(mp->d_R_yy_crust_mantle);
+ cudaFree(mp->d_R_xy_crust_mantle);
+ cudaFree(mp->d_R_xz_crust_mantle);
+ cudaFree(mp->d_R_yz_crust_mantle);
+ cudaFree(mp->d_factor_common_crust_mantle);
+ cudaFree(mp->d_R_xx_inner_core);
+ cudaFree(mp->d_R_yy_inner_core);
+ cudaFree(mp->d_R_xy_inner_core);
+ cudaFree(mp->d_R_xz_inner_core);
+ cudaFree(mp->d_R_yz_inner_core);
+ cudaFree(mp->d_factor_common_inner_core);
+ }
+ cudaFree(mp->d_alphaval);
+ cudaFree(mp->d_betaval);
+ cudaFree(mp->d_gammaval);
+ if( mp->simulation_type == 3 ){
+ cudaFree(mp->d_b_alphaval);
+ cudaFree(mp->d_b_betaval);
+ cudaFree(mp->d_b_gammaval);
+ }
+ }
+
+ // strain
+ if( mp->compute_and_store_strain ){
+ cudaFree(mp->d_epsilondev_xx_crust_mantle);
+ cudaFree(mp->d_epsilondev_yy_crust_mantle);
+ cudaFree(mp->d_epsilondev_xy_crust_mantle);
+ cudaFree(mp->d_epsilondev_xz_crust_mantle);
+ cudaFree(mp->d_epsilondev_yz_crust_mantle);
+
+ cudaFree(mp->d_epsilondev_xx_inner_core);
+ cudaFree(mp->d_epsilondev_yy_inner_core);
+ cudaFree(mp->d_epsilondev_xy_inner_core);
+ cudaFree(mp->d_epsilondev_xz_inner_core);
+ cudaFree(mp->d_epsilondev_yz_inner_core);
+
+ cudaFree(mp->d_eps_trace_over_3_crust_mantle);
+ cudaFree(mp->d_eps_trace_over_3_inner_core);
+ if( mp->simulation_type == 3 ){
+ cudaFree(mp->d_b_epsilondev_xx_crust_mantle);
+ cudaFree(mp->d_b_epsilondev_yy_crust_mantle);
+ cudaFree(mp->d_b_epsilondev_xy_crust_mantle);
+ cudaFree(mp->d_b_epsilondev_xz_crust_mantle);
+ cudaFree(mp->d_b_epsilondev_yz_crust_mantle);
+
+ cudaFree(mp->d_b_epsilondev_xx_inner_core);
+ cudaFree(mp->d_b_epsilondev_yy_inner_core);
+ cudaFree(mp->d_b_epsilondev_xy_inner_core);
+ cudaFree(mp->d_b_epsilondev_xz_inner_core);
+ cudaFree(mp->d_b_epsilondev_yz_inner_core);
+
+ cudaFree(mp->d_b_eps_trace_over_3_crust_mantle);
+ cudaFree(mp->d_b_eps_trace_over_3_inner_core);
+ }
+ }
+
+
+/*
+ // absorbing boundaries
+ if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0 ){
+ cudaFree(mp->d_abs_boundary_ispec);
+ cudaFree(mp->d_abs_boundary_ijk);
+ cudaFree(mp->d_abs_boundary_normal);
+ cudaFree(mp->d_abs_boundary_jacobian2Dw);
+ }
+
+ // interfaces
+ cudaFree(mp->d_nibool_interfaces_ext_mesh);
+ cudaFree(mp->d_ibool_interfaces_ext_mesh);
+
+ // global indexing
+ cudaFree(mp->d_ispec_is_inner);
+ cudaFree(mp->d_ibool);
+
+ // ACOUSTIC arrays
+ if( *ACOUSTIC_SIMULATION ){
+ cudaFree(mp->d_potential_acoustic);
+ cudaFree(mp->d_potential_dot_acoustic);
+ cudaFree(mp->d_potential_dot_dot_acoustic);
+ cudaFree(mp->d_send_potential_dot_dot_buffer);
+ cudaFree(mp->d_rmass_acoustic);
+ cudaFree(mp->d_rhostore);
+ cudaFree(mp->d_kappastore);
+ cudaFree(mp->d_phase_ispec_inner_acoustic);
+ cudaFree(mp->d_ispec_is_acoustic);
+
+ if( *NOISE_TOMOGRAPHY == 0 ){
+ cudaFree(mp->d_free_surface_ispec);
+ cudaFree(mp->d_free_surface_ijk);
+ }
+
+ if( *ABSORBING_CONDITIONS ) cudaFree(mp->d_b_absorb_potential);
+
+ if( *SIMULATION_TYPE == 3 ) {
+ cudaFree(mp->d_b_potential_acoustic);
+ cudaFree(mp->d_b_potential_dot_acoustic);
+ cudaFree(mp->d_b_potential_dot_dot_acoustic);
+ cudaFree(mp->d_rho_ac_kl);
+ cudaFree(mp->d_kappa_ac_kl);
+ if( *APPROXIMATE_HESS_KL) cudaFree(mp->d_hess_ac_kl);
+ }
+
+
+ if(mp->nrec_local > 0 ){
+ cudaFree(mp->d_station_seismo_potential);
+ free(mp->h_station_seismo_potential);
+ }
+
+ } // ACOUSTIC_SIMULATION
+
+ // ELASTIC arrays
+ if( *ELASTIC_SIMULATION ){
+ cudaFree(mp->d_displ);
+ cudaFree(mp->d_veloc);
+ cudaFree(mp->d_accel);
+ cudaFree(mp->d_send_accel_buffer);
+ cudaFree(mp->d_rmass);
+
+ cudaFree(mp->d_phase_ispec_inner_elastic);
+ cudaFree(mp->d_ispec_is_elastic);
+
+ if( mp->nrec_local > 0 ){
+ cudaFree(mp->d_station_seismo_field);
+ free(mp->h_station_seismo_field);
+ }
+
+ if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0){
+ cudaFree(mp->d_rho_vp);
+ cudaFree(mp->d_rho_vs);
+
+ if(*SIMULATION_TYPE == 3 || ( *SIMULATION_TYPE == 1 && *SAVE_FORWARD ))
+ cudaFree(mp->d_b_absorb_field);
+ }
+
+ if( *SIMULATION_TYPE == 3 ) {
+ cudaFree(mp->d_b_displ);
+ cudaFree(mp->d_b_veloc);
+ cudaFree(mp->d_b_accel);
+ cudaFree(mp->d_rho_kl);
+ cudaFree(mp->d_mu_kl);
+ cudaFree(mp->d_kappa_kl);
+ if( *APPROXIMATE_HESS_KL ) cudaFree(mp->d_hess_el_kl);
+ }
+
+ if( *COMPUTE_AND_STORE_STRAIN ){
+ cudaFree(mp->d_epsilondev_xx);
+ cudaFree(mp->d_epsilondev_yy);
+ cudaFree(mp->d_epsilondev_xy);
+ cudaFree(mp->d_epsilondev_xz);
+ cudaFree(mp->d_epsilondev_yz);
+ if( *SIMULATION_TYPE == 3 ){
+ cudaFree(mp->d_epsilon_trace_over_3);
+ cudaFree(mp->d_b_epsilon_trace_over_3);
+ cudaFree(mp->d_b_epsilondev_xx);
+ cudaFree(mp->d_b_epsilondev_yy);
+ cudaFree(mp->d_b_epsilondev_xy);
+ cudaFree(mp->d_b_epsilondev_xz);
+ cudaFree(mp->d_b_epsilondev_yz);
+ }
+ }
+
+ if( *ATTENUATION ){
+ cudaFree(mp->d_factor_common);
+ cudaFree(mp->d_one_minus_sum_beta);
+ cudaFree(mp->d_alphaval);
+ cudaFree(mp->d_betaval);
+ cudaFree(mp->d_gammaval);
+ cudaFree(mp->d_R_xx);
+ cudaFree(mp->d_R_yy);
+ cudaFree(mp->d_R_xy);
+ cudaFree(mp->d_R_xz);
+ cudaFree(mp->d_R_yz);
+ if( *SIMULATION_TYPE == 3){
+ cudaFree(mp->d_b_R_xx);
+ cudaFree(mp->d_b_R_yy);
+ cudaFree(mp->d_b_R_xy);
+ cudaFree(mp->d_b_R_xz);
+ cudaFree(mp->d_b_R_yz);
+ cudaFree(mp->d_b_alphaval);
+ cudaFree(mp->d_b_betaval);
+ cudaFree(mp->d_b_gammaval);
+ }
+ }
+
+ if( *ANISOTROPY ){
+ cudaFree(mp->d_c11store);
+ cudaFree(mp->d_c12store);
+ cudaFree(mp->d_c13store);
+ cudaFree(mp->d_c14store);
+ cudaFree(mp->d_c15store);
+ cudaFree(mp->d_c16store);
+ cudaFree(mp->d_c22store);
+ cudaFree(mp->d_c23store);
+ cudaFree(mp->d_c24store);
+ cudaFree(mp->d_c25store);
+ cudaFree(mp->d_c26store);
+ cudaFree(mp->d_c33store);
+ cudaFree(mp->d_c34store);
+ cudaFree(mp->d_c35store);
+ cudaFree(mp->d_c36store);
+ cudaFree(mp->d_c44store);
+ cudaFree(mp->d_c45store);
+ cudaFree(mp->d_c46store);
+ cudaFree(mp->d_c55store);
+ cudaFree(mp->d_c56store);
+ cudaFree(mp->d_c66store);
+ }
+
+ if( *OCEANS ){
+ if( mp->num_free_surface_faces > 0 ){
+ cudaFree(mp->d_rmass_ocean_load);
+ cudaFree(mp->d_free_surface_normal);
+ cudaFree(mp->d_updated_dof_ocean_load);
+ if( *NOISE_TOMOGRAPHY == 0){
+ cudaFree(mp->d_free_surface_ispec);
+ cudaFree(mp->d_free_surface_ijk);
+ }
+ }
+ }
+ } // ELASTIC_SIMULATION
+
+ // purely adjoint & kernel array
+ if( *SIMULATION_TYPE == 2 || *SIMULATION_TYPE == 3 ){
+ cudaFree(mp->d_islice_selected_rec);
+ if(mp->nadj_rec_local > 0 ){
+ cudaFree(mp->d_adj_sourcearrays);
+ cudaFree(mp->d_pre_computed_irec);
+ free(mp->h_adj_sourcearrays_slice);
+ }
+ }
+
+ // NOISE arrays
+ if( *NOISE_TOMOGRAPHY > 0 ){
+ cudaFree(mp->d_free_surface_ispec);
+ cudaFree(mp->d_free_surface_ijk);
+ cudaFree(mp->d_noise_surface_movie);
+ if( *NOISE_TOMOGRAPHY == 1 ) cudaFree(mp->d_noise_sourcearray);
+ if( *NOISE_TOMOGRAPHY > 1 ){
+ cudaFree(mp->d_normal_x_noise);
+ cudaFree(mp->d_normal_y_noise);
+ cudaFree(mp->d_normal_z_noise);
+ cudaFree(mp->d_mask_noise);
+ cudaFree(mp->d_free_surface_jacobian2Dw);
+ }
+ if( *NOISE_TOMOGRAPHY == 3 ) cudaFree(mp->d_Sigma_kl);
+ }
+
+*/
+
+ // mesh pointer - not needed anymore
+ free(mp);
+}
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/save_and_compare_cpu_vs_gpu.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/save_and_compare_cpu_vs_gpu.c (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/save_and_compare_cpu_vs_gpu.c 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,316 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <errno.h>
+
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+void save_to_max_surface_file_(float* maxval) {
+ int rank;
+ char filename[BUFSIZ];
+ FILE* fp;
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD,&rank);
+#else
+ rank = 0;
+#endif
+ sprintf(filename,"maxval_surface_proc_%03d.dat",rank);
+ fp = fopen(filename,"a+");
+ fprintf(fp,"%e\n",*maxval);
+ fclose(fp);
+}
+
+void save_fvector_(float* vector, int* size, int* id, int* cpu_or_gpu) {
+ FILE* fp;
+ char filename[BUFSIZ];
+ if(*cpu_or_gpu == 0) {
+ sprintf(filename, "debug_output_cpu_%d.dat",*id);
+ }
+ else {
+ sprintf(filename, "debug_output_gpu_%d.dat",*id);
+ }
+ fp = fopen(filename, "wb");
+ printf("writing vector, vector[0]=%e\n",vector[0]);
+ fwrite(vector, sizeof(float), *size, fp);
+ fclose(fp);
+
+}
+
+void save_ivector_(int* vector, int* size, int* id, int* cpu_or_gpu) {
+ FILE* fp;
+ char filename[BUFSIZ];
+ if(*cpu_or_gpu == 0) {
+ sprintf(filename, "debug_output_cpu_%d.dat",*id);
+ }
+ else {
+ sprintf(filename, "debug_output_gpu_%d.dat",*id);
+ }
+ fp = fopen(filename, "wb");
+ fwrite(vector, sizeof(int), *size, fp);
+ fclose(fp);
+
+}
+
+
+void get_max_from_surface_file_(int* nodes_per_iterationf,int* NSTEP) {
+ int nodes_per_iteration = *nodes_per_iterationf;
+ char filename[BUFSIZ];
+ int procid;
+#ifdef WITH_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+#else
+ procid = 0;
+#endif
+ sprintf(filename,"/scratch/eiger/rietmann/SPECFEM3D_AIGLE/in_out_files/DATABASES_MPI/proc%06d_surface_movie",procid);
+
+ FILE* fp; int it;
+ printf("Opening %s for analysis\n",filename);
+ fp = fopen(filename,"rb");
+ //char* errorstr;
+ if(fp == 0) {
+ //errorstr = (char*) strerror(errno);
+ printf("FILE ERROR:%s\n",strerror(errno));
+ perror("file error\n");
+ exit(1);
+ }
+
+ float* vector = (float*)malloc(nodes_per_iteration*sizeof(float));
+ float max_val;
+ int i;
+ for(it=0;it<*NSTEP;it++) {
+ int pos = (sizeof(float)*nodes_per_iteration)*(it);
+ fseek(fp,pos,SEEK_SET);
+ fread(vector,sizeof(float),nodes_per_iteration,fp);
+ for(i=0;i<nodes_per_iteration;i++) {
+ max_val = MAX(max_val,vector[i]);
+ }
+ if(it % 500 == 0) {
+ printf("scanning it=%d\n",it);
+ }
+ }
+ printf("max_val=%e\n",max_val);
+}
+
+void compare_two_vectors_exact_(int* sizef,float* vector1,float* vector2,int* num_errors) {
+
+ int size = *sizef;
+ int i;
+ int error_count = 0;
+
+ for(i=0;i<size;++i) {
+ if(vector1[i] != vector2[i]) {
+ error_count++;
+ if(error_count < 10) {
+ printf("err[%d]: %e != %e\n",i,vector1[i],vector2[i]);
+ }
+ }
+ }
+ printf("**** Error Count: %d ****\n",error_count);
+ *num_errors = error_count;
+}
+
+void compare_two_vectors_(int* sizef,float* vector1,float* vector2,int* num_errors) {
+
+ int size = *sizef;
+ int i;
+ int error_count = 0;
+ for(i=0;i<size;++i) {
+ if(vector1[i] != 0) {
+ if( fabsf(vector1[i]-vector2[i])/vector1[i] > 0.01) {
+ if(fabsf(vector1[i]-vector2[i]) > 1e-20) {
+ error_count++;
+ if(error_count<10) {
+ printf("err[%d]: %e != %e\n",i,vector1[i],vector2[i]);
+ }
+ }
+ }
+ }
+ /* if(vector1[i] != vector2[i]) { */
+ /* if(fabsf(vector1[i]-vector2[i]) > 1e-25) { */
+ /* error_count++; */
+ /* if(error_count<50) { */
+ /* printf("err[%d]: %e != %e\n",i,vector1[i],vector2[i]); */
+ /* } */
+ /* } */
+ /* } */
+ }
+ printf("**** Error Count: %d ****\n",error_count);
+ *num_errors = error_count;
+}
+
+void compare_surface_files_(int* bytes_per_iteration, int* number_of_iterations) {
+
+ char* cpu_file = "/scratch/eiger/rietmann/SPECFEM3D/in_out_files/DATABASES_MPI/cpu_proc000001_surface_movie";
+ char* gpu_file = "/scratch/eiger/rietmann/SPECFEM3D/in_out_files/DATABASES_MPI/cpu_v2_proc000001_surface_movie";
+
+ FILE* fp_cpu;
+ fp_cpu = fopen(cpu_file,"rb");
+ //char* errorstr;
+ if(fp_cpu == 0) {
+ //errorstr = (char*) strerror(errno);
+ //printf("CPU FILE ERROR:%s\n",errorstr);
+ printf("CPU FILE ERROR:%s\n",strerror(errno));
+ perror("cpu file error\n");
+ }
+ FILE* fp_gpu;
+ fp_gpu = fopen(gpu_file,"rb");
+
+ if(fp_gpu == NULL) {
+ //errorstr = (char*) strerror(errno);
+ //printf("GPU FILE ERROR:%s\n",errorstr);
+ printf("GPU FILE ERROR:%s\n",strerror(errno));
+ perror("gpu file error\n");
+ }
+
+ /* pause_for_debug(); */
+
+ float* gpu_vector = (float*)malloc(*bytes_per_iteration);
+ float* cpu_vector = (float*)malloc(*bytes_per_iteration);
+ int i,it,error_count=0;
+ for(it=0;it<*number_of_iterations;it++) {
+ int pos = (*bytes_per_iteration)*(it);
+
+ fseek(fp_cpu,pos,SEEK_SET);
+ fseek(fp_gpu,pos,SEEK_SET);
+
+ int number_of_nodes = *bytes_per_iteration/sizeof(float);
+ fread(cpu_vector,sizeof(float),number_of_nodes,fp_cpu);
+ fread(gpu_vector,sizeof(float),number_of_nodes,fp_gpu);
+ int size = number_of_nodes;
+ float gpu_min_val=10;
+ float gpu_max_val=0;
+ float cpu_min_val=10;
+ float cpu_max_val=0;
+ if(it<100) {
+ for(i=0;i<size;i++) {
+ if((fabs(cpu_vector[i] - gpu_vector[i])/(fabs(cpu_vector[i])+1e-31) > 0.01)) {
+ if(error_count < 30) printf("ERROR[%d]: %g != %g\n",i,cpu_vector[i], gpu_vector[i]);
+ if(cpu_vector[i] > 1e-30) error_count++;
+ }
+ if(gpu_vector[i]>gpu_max_val) gpu_max_val = gpu_vector[i];
+ if(gpu_vector[i]<gpu_min_val) gpu_min_val = gpu_vector[i];
+ if(cpu_vector[i]>cpu_max_val) cpu_max_val = cpu_vector[i];
+ if(cpu_vector[i]<cpu_min_val) cpu_min_val = cpu_vector[i];
+ }
+ printf("%d Total Errors\n",error_count);
+ printf("size:%d\n",size);
+ printf("GPU:[min/max]=%e/%e\n",gpu_min_val,gpu_max_val);
+ printf("CPU:[min/max]=%e/%e\n",cpu_min_val,cpu_max_val);
+ }
+ }
+ printf("End of Surface Compare\n");
+ exit(1);
+}
+
+
+void compare_fvector_(float* vector, int* size, int* id, int* cpu_or_gpu) {
+ FILE* fp;
+ char cmp_filename[BUFSIZ];
+ float* compare_vector = (float*)malloc(*size*sizeof(float));
+ if(*cpu_or_gpu == 0) { //swap gpu/cpu for compare
+ sprintf(cmp_filename, "debug_output_gpu_%d.dat",*id);
+ }
+ else {
+ sprintf(cmp_filename, "debug_output_cpu_%d.dat",*id);
+ }
+ fopen(cmp_filename, "rb");
+ /* read the values */
+ if((fp=fopen(cmp_filename, "rb"))==NULL) {
+ printf("Cannot open comparison file %s.\n",cmp_filename);
+ exit(1);
+ }
+ if(fread(compare_vector, sizeof(float), *size, fp) != *size) {
+ if(feof(fp))
+ printf("Premature end of file.");
+ else
+ printf("File read error.");
+ }
+
+ fclose(fp);
+
+ int i;
+ int error_count=0;
+ for(i=0;i<*size;i++) {
+ if((fabs(vector[i] - compare_vector[i])/vector[i] > 0.0001)) {
+ if(error_count < 30) {
+ printf("ERROR[%d]: %g != %g\n",i,compare_vector[i], vector[i]);
+ }
+ error_count++;
+ /* if(compare_vector[i] > 1e-30) error_count++; */
+ }
+ }
+ printf("%d Total Errors\n",error_count);
+ printf("size:%d\n",*size);
+ /* for(i=0;i<30;i++) { */
+ /* printf("val[%d]: %g != %g\n",i,compare_vector[i], vector[i]); */
+ /* /\* printf("error_check[%d]= %g\n",abs(vector[i] - compare_vector[i])/vector[i]); *\/ */
+ /* } */
+}
+
+void compare_ivector_(int* vector, int* size, int* id, int* cpu_or_gpu) {
+ FILE* fp;
+ char cmp_filename[BUFSIZ];
+ int* compare_vector = (int*)malloc(*size*sizeof(int));
+ if(*cpu_or_gpu == 0) { //swap gpu/cpu for compare
+ sprintf(cmp_filename, "debug_output_gpu_%d.dat",*id);
+ }
+ else {
+ sprintf(cmp_filename, "debug_output_cpu_%d.dat",*id);
+ }
+ fopen(cmp_filename, "rb");
+ /* read the values */
+ if((fp=fopen(cmp_filename, "rb"))==NULL) {
+ printf("Cannot open comparison file %s.\n",cmp_filename);
+ exit(1);
+ }
+ if(fread(compare_vector, sizeof(int), *size, fp) != *size) {
+ if(feof(fp))
+ printf("Premature end of file.");
+ else
+ printf("File read error.");
+ }
+
+ fclose(fp);
+
+ int i;
+ int error_count=0;
+ for(i=0;i<*size;i++) {
+ if((abs(vector[i] - compare_vector[i])/vector[i] > 0.01) && error_count < 30) {
+ printf("ERROR[%d]: %g != %g\n",i,compare_vector[i], vector[i]);
+ error_count++;
+ }
+ }
+ printf("%d Total Errors\n",error_count);
+}
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,896 @@
+/*
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "config.h"
+
+typedef float realw;
+
+
+
+//
+// src/cuda/assemble_MPI_scalar_cuda.cu
+//
+
+void FC_FUNC_(transfer_boun_pot_from_device,
+ TRANSFER_BOUN_POT_FROM_DEVICE)(long* Mesh_pointer_f,
+ realw* send_potential_dot_dot_buffer,
+ int* FORWARD_OR_ADJOINT){}
+
+void FC_FUNC_(transfer_asmbl_pot_to_device,
+ TRANSFER_ASMBL_POT_TO_DEVICE)(long* Mesh_pointer,
+ realw* buffer_recv_scalar,
+ int* FORWARD_OR_ADJOINT) {}
+
+
+//
+// src/cuda/assemble_MPI_vector_cuda.cu
+//
+
+void FC_FUNC_(transfer_boun_accel_from_device,
+ TRANSFER_BOUN_ACCEL_FROM_DEVICE)(long* Mesh_pointer_f,
+ realw* send_accel_buffer,
+ int* IREGION,
+ int* FORWARD_OR_ADJOINT){}
+
+void FC_FUNC_(transfer_asmbl_accel_to_device,
+ TRANSFER_ASMBL_ACCEL_TO_DEVICE)(long* Mesh_pointer,
+ realw* buffer_recv_vector,
+ int* IREGION,
+ int* FORWARD_OR_ADJOINT) {}
+
+
+//
+// src/cuda/check_fields_cuda.cu
+//
+
+void FC_FUNC_(pause_for_debug,
+ PAUSE_FOR_DEBUG)() {}
+
+void FC_FUNC_(output_free_device_memory,
+ OUTPUT_FREE_DEVICE_MEMORY)(int* myrank) {}
+
+void FC_FUNC_(get_free_device_memory,
+ get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ) {}
+
+void FC_FUNC_(check_max_norm_displ_gpu,
+ CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {}
+
+void FC_FUNC_(check_max_norm_vector,
+ CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID) {}
+
+void FC_FUNC_(check_max_norm_displ,
+ CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID) {}
+
+void FC_FUNC_(check_max_norm_b_displ_gpu,
+ CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID) {}
+
+void FC_FUNC_(check_max_norm_b_accel_gpu,
+ CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID) {}
+
+void FC_FUNC_(check_max_norm_b_veloc_gpu,
+ CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID) {}
+
+void FC_FUNC_(check_max_norm_b_displ,
+ CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID) {}
+
+void FC_FUNC_(check_max_norm_b_accel,
+ CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID) {}
+
+void FC_FUNC_(check_error_vectors,
+ CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2) {}
+
+void FC_FUNC_(get_max_accel,
+ GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {}
+
+void FC_FUNC_(get_norm_acoustic_from_device,
+ GET_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
+ long* Mesh_pointer_f,
+ int* SIMULATION_TYPE) {}
+
+void FC_FUNC_(get_norm_elastic_from_device,
+ GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
+ long* Mesh_pointer_f,
+ int* SIMULATION_TYPE) {}
+
+
+//
+// src/cuda/compute_add_sources_acoustic_cuda.cu
+//
+
+void FC_FUNC_(compute_add_sources_ac_cuda,
+ COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* NSOURCESf,
+ int* SIMULATION_TYPEf,
+ double* h_stf_pre_compute,
+ int* myrankf) {}
+
+void FC_FUNC_(compute_add_sources_ac_s3_cuda,
+ COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* NSOURCESf,
+ int* SIMULATION_TYPEf,
+ double* h_stf_pre_compute,
+ int* myrankf) {}
+
+void FC_FUNC_(add_sources_ac_sim_2_or_3_cuda,
+ ADD_SOURCES_AC_SIM_2_OR_3_CUDA)(long* Mesh_pointer,
+ realw* h_adj_sourcearrays,
+ int* phase_is_inner,
+ int* h_ispec_is_inner,
+ int* h_ispec_is_acoustic,
+ int* h_ispec_selected_rec,
+ int* myrank,
+ int* nrec,
+ int* time_index,
+ int* h_islice_selected_rec,
+ int* nadj_rec_local,
+ int* NTSTEP_BETWEEN_READ_ADJSRC) {}
+
+
+//
+// src/cuda/compute_add_sources_elastic_cuda.cu
+//
+
+void FC_FUNC_(compute_add_sources_el_cuda,
+ COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* NSOURCESf,
+ double* h_stf_pre_compute,
+ int* myrankf) {}
+
+void FC_FUNC_(compute_add_sources_el_s3_cuda,
+ COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
+ double* h_stf_pre_compute,
+ int* NSOURCESf,
+ int* phase_is_inner,
+ int* myrank) {}
+
+void FC_FUNC_(add_source_master_rec_noise_cu,
+ ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f,
+ int* myrank_f,
+ int* it_f,
+ int* irec_master_noise_f,
+ int* islice_selected_rec) {}
+
+void FC_FUNC_(add_sources_el_sim_type_2_or_3,
+ ADD_SOURCES_EL_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
+ realw* h_adj_sourcearrays,
+ int* phase_is_inner,
+ int* h_ispec_is_inner,
+ int* h_ispec_is_elastic,
+ int* h_ispec_selected_rec,
+ int* myrank,
+ int* nrec,
+ int* time_index,
+ int* h_islice_selected_rec,
+ int* nadj_rec_local,
+ int* NTSTEP_BETWEEN_READ_ADJSRC) {}
+
+
+//
+// src/cuda/compute_coupling_cuda.cu
+//
+
+void FC_FUNC_(compute_coupling_ac_el_cuda,
+ COMPUTE_COUPLING_AC_EL_CUDA)(
+ long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* num_coupling_ac_el_facesf,
+ int* SIMULATION_TYPEf) {}
+
+void FC_FUNC_(compute_coupling_el_ac_cuda,
+ COMPUTE_COUPLING_EL_AC_CUDA)(
+ long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* num_coupling_ac_el_facesf,
+ int* SIMULATION_TYPEf) {}
+
+void FC_FUNC_(compute_coupling_ocean_cuda,
+ COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f,
+ int* SIMULATION_TYPE) {}
+
+
+//
+// src/cuda/compute_forces_crust_mantle_cuda.cu
+//
+
+void FC_FUNC_(compute_forces_crust_mantle_cuda,
+ COMPUTE_FORCES_CRUST_MANTLE_CUDA)(long* Mesh_pointer_f,
+ int* iphase) {}
+
+
+//
+// src/cuda/compute_forces_inner_core_cuda.cu
+//
+
+void FC_FUNC_(compute_forces_inner_core_cuda,
+ COMPUTE_FORCES_INNER_CORE_CUDA)(long* Mesh_pointer_f,
+ int* iphase) {}
+
+
+//
+// src/cuda/compute_forces_outer_core_cuda.cu
+//
+
+void FC_FUNC_(compute_forces_outer_core_cuda,
+ COMPUTE_FORCES_OUTER_CORE_CUDA)(long* Mesh_pointer_f,
+ int* iphase,
+ realw* time_f,
+ realw* b_time_f) {}
+
+
+//
+// src/cuda/compute_kernels_cuda.cu
+//
+
+void FC_FUNC_(compute_kernels_elastic_cuda,
+ COMPUTE_KERNELS_ELASTIC_CUDA)(long* Mesh_pointer,
+ realw* deltat_f) {}
+
+void FC_FUNC_(compute_kernels_strgth_noise_cu,
+ COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer,
+ realw* h_noise_surface_movie,
+ realw* deltat) {}
+
+void FC_FUNC_(compute_kernels_acoustic_cuda,
+ COMPUTE_KERNELS_ACOUSTIC_CUDA)(
+ long* Mesh_pointer,
+ realw* deltat_f) {}
+
+void FC_FUNC_(compute_kernels_hess_cuda,
+ COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
+ realw* deltat_f,
+ int* ELASTIC_SIMULATION,
+ int* ACOUSTIC_SIMULATION) {}
+
+
+//
+// src/cuda/compute_stacey_acoustic_cuda.cu
+//
+
+void FC_FUNC_(compute_stacey_acoustic_cuda,
+ COMPUTE_STACEY_ACOUSTIC_CUDA)(
+ long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* SIMULATION_TYPEf,
+ int* SAVE_FORWARDf,
+ realw* h_b_absorb_potential) {}
+
+
+//
+// src/cuda/compute_stacey_elastic_cuda.cu
+//
+
+void FC_FUNC_(compute_stacey_elastic_cuda,
+ COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* SIMULATION_TYPEf,
+ int* SAVE_FORWARDf,
+ realw* h_b_absorb_field) {}
+
+
+//
+// src/cuda/it_update_displacement_cuda.cu
+//
+
+void FC_FUNC_(it_update_displacement_cuda,
+ IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f,
+ int* size_F,
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ int* SIMULATION_TYPE,
+ realw* b_deltat_F,
+ realw* b_deltatsqover2_F,
+ realw* b_deltatover2_F) {}
+
+void FC_FUNC_(it_update_displacement_ac_cuda,
+ it_update_displacement_ac_cuda)(long* Mesh_pointer_f,
+ int* size_F,
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ int* SIMULATION_TYPE,
+ realw* b_deltat_F,
+ realw* b_deltatsqover2_F,
+ realw* b_deltatover2_F) {}
+
+void FC_FUNC_(kernel_3_a_cuda,
+ KERNEL_3_A_CUDA)(long* Mesh_pointer,
+ realw* deltatover2_F,
+ int* SIMULATION_TYPE_f,
+ realw* b_deltatover2_F,
+ int* OCEANS) {}
+
+void FC_FUNC_(kernel_3_b_cuda,
+ KERNEL_3_B_CUDA)(long* Mesh_pointer,
+ realw* deltatover2_F,
+ int* SIMULATION_TYPE_f,
+ realw* b_deltatover2_F,
+ int* OCEANS) {}
+
+void FC_FUNC_(kernel_3_outer_core_cuda,
+ KERNEL_3_OUTER_CORE_CUDA)(long* Mesh_pointer,
+ realw* deltatover2_F,
+ int* SIMULATION_TYPE_f,
+ realw* b_deltatover2_F) {}
+
+
+//
+// src/cuda/noise_tomography_cuda.cu
+//
+
+void FC_FUNC_(fortranflush,FORTRANFLUSH)(int* rank){}
+
+void FC_FUNC_(fortranprint,FORTRANPRINT)(int* id) {}
+
+void FC_FUNC_(fortranprintf,FORTRANPRINTF)(realw* val) {}
+
+void FC_FUNC_(fortranprintd,FORTRANPRINTD)(double* val) {}
+
+void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,realw* h_displ) {}
+
+void FC_FUNC_(transfer_surface_to_host,
+ TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,
+ realw* h_noise_surface_movie) {}
+
+void FC_FUNC_(noise_read_add_surface_movie_cu,
+ NOISE_READ_ADD_SURFACE_MOVIE_CU)(long* Mesh_pointer_f,
+ realw* h_noise_surface_movie,
+ int* NOISE_TOMOGRAPHYf) {}
+
+
+//
+// src/cuda/prepare_mesh_constants_cuda.cu
+//
+
+void FC_FUNC_(prepare_cuda_device,
+ PREPARE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices) {
+ fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\n");
+ exit(1);
+}
+
+void FC_FUNC_(prepare_constants_device,
+ PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
+ int* h_NGLLX,
+ realw* h_hprime_xx,realw* h_hprime_yy,realw* h_hprime_zz,
+ realw* h_hprimewgll_xx,realw* h_hprimewgll_yy,realw* h_hprimewgll_zz,
+ realw* h_wgllwgll_xy,realw* h_wgllwgll_xz,realw* h_wgllwgll_yz,
+ int* NSOURCES,int* nsources_local,
+ realw* h_sourcearrays,
+ int* h_islice_selected_source,
+ int* h_ispec_selected_source,
+ int* h_number_receiver_global,
+ int* h_ispec_selected_rec,
+ int* nrec,int* nrec_local,
+ int* NSPEC_CRUST_MANTLE, int* NGLOB_CRUST_MANTLE,
+ int* NSPEC_OUTER_CORE, int* NGLOB_OUTER_CORE,
+ int* NSPEC_INNER_CORE, int* NGLOB_INNER_CORE,
+ int* SIMULATION_TYPE,
+ int* SAVE_FORWARD_f,
+ int* ABSORBING_CONDITIONS_f,
+ int* GRAVITY_f,
+ int* ROTATION_f,
+ int* ATTENUATION_f,
+ int* USE_ATTENUATION_MIMIC_f,
+ int* COMPUTE_AND_STORE_STRAIN_f,
+ int* ANISOTROPIC_3D_MANTLE_f,
+ int* ANISOTROPIC_INNER_CORE_f,
+ int* SAVE_BOUNDARY_MESH_f,
+ int* USE_MESH_COLORING_GPU_f) {}
+
+void FC_FUNC_(prepare_fields_rotation_device,
+ PREPARE_FIELDS_ROTATION_DEVICE)(long* Mesh_pointer_f,
+ realw* two_omega_earth,
+ realw* deltat,
+ realw* A_array_rotation,
+ realw* B_array_rotation,
+ realw* b_two_omega_earth,
+ realw* b_deltat,
+ realw* b_A_array_rotation,
+ realw* b_B_array_rotation,
+ int* NSPEC_OUTER_CORE_ROTATION
+ ) {}
+
+void FC_FUNC_(prepare_fields_gravity_device,
+ PREPARE_FIELDS_gravity_DEVICE)(long* Mesh_pointer_f,
+ realw* d_ln_density_dr_table,
+ realw* minus_rho_g_over_kappa_fluid,
+ realw* minus_gravity_table,
+ realw* minus_deriv_gravity_table,
+ realw* density_table,
+ realw* h_wgll_cube,
+ int* NRAD_GRAVITY
+ ) {}
+
+void FC_FUNC_(prepare_fields_attenuat_device,
+ PREPARE_FIELDS_ATTENUAT_DEVICE)(long* Mesh_pointer_f,
+ realw* R_xx_crust_mantle,
+ realw* R_yy_crust_mantle,
+ realw* R_xy_crust_mantle,
+ realw* R_xz_crust_mantle,
+ realw* R_yz_crust_mantle,
+ realw* factor_common_crust_mantle,
+ realw* one_minus_sum_beta_crust_mantle,
+ realw* R_xx_inner_core,
+ realw* R_yy_inner_core,
+ realw* R_xy_inner_core,
+ realw* R_xz_inner_core,
+ realw* R_yz_inner_core,
+ realw* factor_common_inner_core,
+ realw* one_minus_sum_beta_inner_core,
+ realw* alphaval,realw* betaval,realw* gammaval,
+ realw* b_alphaval,realw* b_betaval,realw* b_gammaval
+ ) {}
+
+void FC_FUNC_(prepare_fields_strain_device,
+ PREPARE_FIELDS_STRAIN_DEVICE)(long* Mesh_pointer_f,
+ realw* epsilondev_xx_crust_mantle,
+ realw* epsilondev_yy_crust_mantle,
+ realw* epsilondev_xy_crust_mantle,
+ realw* epsilondev_xz_crust_mantle,
+ realw* epsilondev_yz_crust_mantle,
+ realw* b_epsilondev_xx_crust_mantle,
+ realw* b_epsilondev_yy_crust_mantle,
+ realw* b_epsilondev_xy_crust_mantle,
+ realw* b_epsilondev_xz_crust_mantle,
+ realw* b_epsilondev_yz_crust_mantle,
+ realw* eps_trace_over_3_crust_mantle,
+ realw* b_eps_trace_over_3_crust_mantle,
+ realw* epsilondev_xx_inner_core,
+ realw* epsilondev_yy_inner_core,
+ realw* epsilondev_xy_inner_core,
+ realw* epsilondev_xz_inner_core,
+ realw* epsilondev_yz_inner_core,
+ realw* b_epsilondev_xx_inner_core,
+ realw* b_epsilondev_yy_inner_core,
+ realw* b_epsilondev_xy_inner_core,
+ realw* b_epsilondev_xz_inner_core,
+ realw* b_epsilondev_yz_inner_core,
+ realw* eps_trace_over_3_inner_core,
+ realw* b_eps_trace_over_3_inner_core
+ ) {}
+
+void FC_FUNC_(prepare_mpi_buffers_device,
+ PREPARE_MPI_BUFFERS_DEVICE)(long* Mesh_pointer_f,
+ int* num_interfaces_crust_mantle,
+ int* max_nibool_interfaces_crust_mantle,
+ int* nibool_interfaces_crust_mantle,
+ int* ibool_interfaces_crust_mantle,
+ int* num_interfaces_inner_core,
+ int* max_nibool_interfaces_inner_core,
+ int* nibool_interfaces_inner_core,
+ int* ibool_interfaces_inner_core,
+ int* num_interfaces_outer_core,
+ int* max_nibool_interfaces_outer_core,
+ int* nibool_interfaces_outer_core,
+ int* ibool_interfaces_outer_core
+ ){}
+
+void FC_FUNC_(prepare_crust_mantle_device,
+ PREPARE_CRUST_MANTLE_DEVICE)(long* Mesh_pointer_f,
+ realw* h_xix, realw* h_xiy, realw* h_xiz,
+ realw* h_etax, realw* h_etay, realw* h_etaz,
+ realw* h_gammax, realw* h_gammay, realw* h_gammaz,
+ realw* h_rho,
+ realw* h_kappav, realw* h_muv,
+ realw* h_kappah, realw* h_muh,
+ realw* h_eta_aniso,
+ realw* h_rmass,
+ int* h_ibool,
+ realw* h_xstore, realw* h_ystore, realw* h_zstore,
+ int* h_ispec_is_tiso,
+ realw *c11store,realw *c12store,realw *c13store,
+ realw *c14store,realw *c15store,realw *c16store,
+ realw *c22store,realw *c23store,realw *c24store,
+ realw *c25store,realw *c26store,realw *c33store,
+ realw *c34store,realw *c35store,realw *c36store,
+ realw *c44store,realw *c45store,realw *c46store,
+ realw *c55store,realw *c56store,realw *c66store,
+ int* num_phase_ispec,
+ int* phase_ispec_inner,
+ int* nspec_outer,
+ int* nspec_inner
+ ) {}
+
+void FC_FUNC_(prepare_outer_core_device,
+ PREPARE_OUTER_CORE_DEVICE)(long* Mesh_pointer_f,
+ realw* h_xix, realw* h_xiy, realw* h_xiz,
+ realw* h_etax, realw* h_etay, realw* h_etaz,
+ realw* h_gammax, realw* h_gammay, realw* h_gammaz,
+ realw* h_rho, realw* h_kappav,
+ realw* h_rmass,
+ int* h_ibool,
+ realw* h_xstore, realw* h_ystore, realw* h_zstore,
+ int* num_phase_ispec,
+ int* phase_ispec_inner,
+ int* nspec_outer,
+ int* nspec_inner
+ ) {}
+
+void FC_FUNC_(prepare_inner_core_device,
+ PREPARE_INNER_CORE_DEVICE)(long* Mesh_pointer_f,
+ realw* h_xix, realw* h_xiy, realw* h_xiz,
+ realw* h_etax, realw* h_etay, realw* h_etaz,
+ realw* h_gammax, realw* h_gammay, realw* h_gammaz,
+ realw* h_rho, realw* h_kappav, realw* h_muv,
+ realw* h_rmass,
+ int* h_ibool,
+ realw* h_xstore, realw* h_ystore, realw* h_zstore,
+ realw *c11store,realw *c12store,realw *c13store,
+ realw *c33store,realw *c44store,
+ int* h_idoubling_inner_core,
+ int* num_phase_ispec,
+ int* phase_ispec_inner,
+ int* nspec_outer,
+ int* nspec_inner
+ //int* iboolleft_xi, int* iboolright_xi,
+ //int* iboolleft_eta, int* iboolright_eta,
+ //int* npoin2D_xi, int* npoin2D_eta
+ ) {}
+
+void FC_FUNC_(prepare_fields_acoustic_device,
+ PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f,
+ realw* rmass_acoustic,
+ realw* rhostore,
+ realw* kappastore,
+ int* num_phase_ispec_acoustic,
+ int* phase_ispec_inner_acoustic,
+ int* ispec_is_acoustic,
+ int* NOISE_TOMOGRAPHY,
+ int* num_free_surface_faces,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* ABSORBING_CONDITIONS,
+ int* b_reclen_potential,
+ realw* b_absorb_potential,
+ int* ELASTIC_SIMULATION,
+ int* num_coupling_ac_el_faces,
+ int* coupling_ac_el_ispec,
+ int* coupling_ac_el_ijk,
+ realw* coupling_ac_el_normal,
+ realw* coupling_ac_el_jacobian2Dw,
+ int* num_colors_outer_acoustic,
+ int* num_colors_inner_acoustic,
+ int* num_elem_colors_acoustic) {}
+
+void FC_FUNC_(prepare_fields_acoustic_adj_dev,
+ PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
+ int* SIMULATION_TYPE,
+ int* APPROXIMATE_HESS_KL) {}
+
+void FC_FUNC_(prepare_fields_elastic_device,
+ PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
+ int* size,
+ realw* rmass,
+ realw* rho_vp,
+ realw* rho_vs,
+ int* num_phase_ispec_elastic,
+ int* phase_ispec_inner_elastic,
+ int* ispec_is_elastic,
+ int* ABSORBING_CONDITIONS,
+ realw* h_b_absorb_field,
+ int* h_b_reclen_field,
+ int* SIMULATION_TYPE,int* SAVE_FORWARD,
+ int* COMPUTE_AND_STORE_STRAIN,
+ realw* epsilondev_xx,realw* epsilondev_yy,realw* epsilondev_xy,
+ realw* epsilondev_xz,realw* epsilondev_yz,
+ int* ATTENUATION,
+ int* R_size,
+ realw* R_xx,realw* R_yy,realw* R_xy,realw* R_xz,realw* R_yz,
+ realw* one_minus_sum_beta,realw* factor_common,
+ realw* alphaval,realw* betaval,realw* gammaval,
+ int* OCEANS,
+ realw* rmass_ocean_load,
+ int* NOISE_TOMOGRAPHY,
+ realw* free_surface_normal,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* num_free_surface_faces,
+ int* ACOUSTIC_SIMULATION,
+ int* num_colors_outer_elastic,
+ int* num_colors_inner_elastic,
+ int* num_elem_colors_elastic,
+ int* ANISOTROPY,
+ realw *c11store,
+ realw *c12store,
+ realw *c13store,
+ realw *c14store,
+ realw *c15store,
+ realw *c16store,
+ realw *c22store,
+ realw *c23store,
+ realw *c24store,
+ realw *c25store,
+ realw *c26store,
+ realw *c33store,
+ realw *c34store,
+ realw *c35store,
+ realw *c36store,
+ realw *c44store,
+ realw *c45store,
+ realw *c46store,
+ realw *c55store,
+ realw *c56store,
+ realw *c66store){}
+
+void FC_FUNC_(prepare_fields_elastic_adj_dev,
+ PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
+ int* size,
+ int* SIMULATION_TYPE,
+ int* COMPUTE_AND_STORE_STRAIN,
+ realw* epsilon_trace_over_3,
+ realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy,
+ realw* b_epsilondev_xz,realw* b_epsilondev_yz,
+ realw* b_epsilon_trace_over_3,
+ int* ATTENUATION,
+ int* R_size,
+ realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
+ realw* b_alphaval,realw* b_betaval,realw* b_gammaval,
+ int* APPROXIMATE_HESS_KL){}
+
+void FC_FUNC_(prepare_sim2_or_3_const_device,
+ PREPARE_SIM2_OR_3_CONST_DEVICE)(
+ long* Mesh_pointer_f,
+ int* islice_selected_rec,
+ int* islice_selected_rec_size,
+ int* nadj_rec_local,
+ int* nrec,
+ int* myrank) {}
+
+void FC_FUNC_(prepare_fields_noise_device,
+ PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
+ int* NSPEC_AB, int* NGLOB_AB,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* num_free_surface_faces,
+ int* SIMULATION_TYPE,
+ int* NOISE_TOMOGRAPHY,
+ int* NSTEP,
+ realw* noise_sourcearray,
+ realw* normal_x_noise,
+ realw* normal_y_noise,
+ realw* normal_z_noise,
+ realw* mask_noise,
+ realw* free_surface_jacobian2Dw) {}
+
+void FC_FUNC_(prepare_cleanup_device,
+ PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f) {}
+
+
+//
+// src/cuda/transfer_fields_cuda.cu
+//
+
+void FC_FUNC_(transfer_fields_cm_to_device,
+ TRANSFER_FIELDS_CM_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_fields_ic_to_device,
+ TRANSFER_FIELDS_IC_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_fields_oc_to_device,
+ TRANSFER_FIELDS_OC_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_fields_cm_to_device,
+ TRANSFER_FIELDS_B_CM_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_fields_ic_to_device,
+ TRANSFER_FIELDS_B_IC_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_fields_oc_to_device,
+ TRANSFER_FIELDS_B_OC_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_fields_cm_from_device,
+ TRANSFER_FIELDS_CM_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_fields_ic_from_device,
+ TRANSFER_FIELDS_IC_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_fields_oc_from_device,
+ TRANSFER_FIELDS_OC_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_fields_cm_from_device,
+ TRANSFER_B_FIELDS_CM_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_fields_ic_from_device,
+ TRANSFER_B_FIELDS_IC_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_fields_oc_from_device,
+ TRANSFER_B_FIELDS_OC_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_accel_cm_to_device,
+ TRNASFER_ACCEL_CM_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_accel_cm_from_device,
+ TRANSFER_ACCEL_CM_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_accel_cm_from_device,
+ TRNASFER_B_ACCEL_CM_FROM_DEVICE)(int* size, realw* b_accel,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_sigma_from_device,
+ TRANSFER_SIGMA_FROM_DEVICE)(int* size, realw* sigma_kl,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_displ_from_device,
+ TRANSFER_B_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_displ_from_device,
+ TRANSFER_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_compute_kernel_answers_from_device,
+ TRANSFER_COMPUTE_KERNEL_ANSWERS_FROM_DEVICE)(long* Mesh_pointer,
+ realw* rho_kl,int* size_rho,
+ realw* mu_kl, int* size_mu,
+ realw* kappa_kl, int* size_kappa) {}
+
+void FC_FUNC_(transfer_compute_kernel_fields_from_device,
+ TRANSFER_COMPUTE_KERNEL_FIELDS_FROM_DEVICE)(long* Mesh_pointer,
+ realw* accel, int* size_accel,
+ realw* b_displ, int* size_b_displ,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz,
+ int* size_epsilondev,
+ realw* b_epsilondev_xx,
+ realw* b_epsilondev_yy,
+ realw* b_epsilondev_xy,
+ realw* b_epsilondev_xz,
+ realw* b_epsilondev_yz,
+ int* size_b_epsilondev,
+ realw* rho_kl,int* size_rho,
+ realw* mu_kl, int* size_mu,
+ realw* kappa_kl, int* size_kappa,
+ realw* epsilon_trace_over_3,
+ realw* b_epsilon_trace_over_3,
+ int* size_epsilon_trace_over_3) {}
+
+void FC_FUNC_(transfer_b_fields_att_to_device,
+ TRANSFER_B_FIELDS_ATT_TO_DEVICE)(long* Mesh_pointer,
+ realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
+ int* size_R,
+ realw* b_epsilondev_xx,
+ realw* b_epsilondev_yy,
+ realw* b_epsilondev_xy,
+ realw* b_epsilondev_xz,
+ realw* b_epsilondev_yz,
+ int* size_epsilondev) {}
+
+void FC_FUNC_(transfer_fields_att_from_device,
+ TRANSFER_FIELDS_ATT_FROM_DEVICE)(long* Mesh_pointer,
+ realw* R_xx,realw* R_yy,realw* R_xy,realw* R_xz,realw* R_yz,
+ int* size_R,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz,
+ int* size_epsilondev) {}
+
+void FC_FUNC_(transfer_kernels_el_to_host,
+ TRANSFER_KERNELS_EL_TO_HOST)(long* Mesh_pointer,
+ realw* h_rho_kl,
+ realw* h_mu_kl,
+ realw* h_kappa_kl,
+ int* NSPEC_AB) {}
+
+void FC_FUNC_(transfer_kernels_noise_to_host,
+ TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
+ realw* h_Sigma_kl,
+ int* NSPEC_AB) {}
+
+void FC_FUNC_(transfer_fields_ac_to_device,
+ TRANSFER_FIELDS_AC_TO_DEVICE)(
+ int* size,
+ realw* potential_acoustic,
+ realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_fields_ac_to_device,
+ TRANSFER_B_FIELDS_AC_TO_DEVICE)(
+ int* size,
+ realw* b_potential_acoustic,
+ realw* b_potential_dot_acoustic,
+ realw* b_potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
+ int* size,
+ realw* potential_acoustic,
+ realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_fields_ac_from_device,
+ TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
+ int* size,
+ realw* b_potential_acoustic,
+ realw* b_potential_dot_acoustic,
+ realw* b_potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_dot_dot_from_device,
+ TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, realw* potential_dot_dot_acoustic,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_dot_dot_from_device,
+ TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, realw* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_kernels_ac_to_host,
+ TRANSFER_KERNELS_AC_TO_HOST)(long* Mesh_pointer,
+ realw* h_rho_ac_kl,
+ realw* h_kappa_ac_kl,
+ int* NSPEC_AB) {}
+
+void FC_FUNC_(transfer_kernels_hess_el_tohost,
+ TRANSFER_KERNELS_HESS_EL_TOHOST)(long* Mesh_pointer,
+ realw* h_hess_kl,
+ int* NSPEC_AB) {}
+
+void FC_FUNC_(transfer_kernels_hess_ac_tohost,
+ TRANSFER_KERNELS_HESS_AC_TOHOST)(long* Mesh_pointer,
+ realw* h_hess_ac_kl,
+ int* NSPEC_AB) {}
+
+
+//
+// src/cuda/write_seismograms_cuda.cu
+//
+
+void FC_FUNC_(transfer_station_el_from_device,
+ TRANSFER_STATION_EL_FROM_DEVICE)(realw* displ,realw* veloc,realw* accel,
+ realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f,int* number_receiver_global,
+ int* ispec_selected_rec,int* ispec_selected_source,
+ int* ibool,int* SIMULATION_TYPEf) {}
+
+void FC_FUNC_(transfer_station_ac_from_device,
+ TRANSFER_STATION_AC_FROM_DEVICE)(
+ realw* potential_acoustic,
+ realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
+ realw* b_potential_acoustic,
+ realw* b_potential_dot_acoustic,
+ realw* b_potential_dot_dot_acoustic,
+ long* Mesh_pointer_f,
+ int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ispec_selected_source,
+ int* ibool,
+ int* SIMULATION_TYPEf) {}
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,728 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+#include "prepare_constants_cuda.h"
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Transfer functions
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// transfer memory from CPU host to GPU device
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// crust_mantle
+extern "C"
+void FC_FUNC_(transfer_fields_cm_to_device,
+ TRANSFER_FIELDS_CM_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
+
+TRACE("transfer_fields_cm_to_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_displ_crust_mantle,displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40003);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_veloc_crust_mantle,veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40004);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_accel_crust_mantle,accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40005);
+
+}
+
+// inner_core
+extern "C"
+void FC_FUNC_(transfer_fields_ic_to_device,
+ TRANSFER_FIELDS_IC_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
+
+ TRACE("transfer_fields_ic_to_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_displ_inner_core,displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40003);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_veloc_inner_core,veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40004);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_accel_inner_core,accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40005);
+
+}
+
+// outer_core
+extern "C"
+void FC_FUNC_(transfer_fields_oc_to_device,
+ TRANSFER_FIELDS_OC_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
+
+ TRACE("transfer_fields_oc_to_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_displ_outer_core,displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40003);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_veloc_outer_core,veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40004);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_accel_outer_core,accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40005);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// backward/reconstructed fields
+
+// crust_mantle
+extern "C"
+void FC_FUNC_(transfer_b_fields_cm_to_device,
+ TRANSFER_FIELDS_B_CM_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {
+
+ TRACE("transfer_fields_b_cm_to_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ cudaMemcpy(mp->d_b_displ_crust_mantle,b_displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_veloc_crust_mantle,b_veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_accel_crust_mantle,b_accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+
+}
+
+// inner_core
+extern "C"
+void FC_FUNC_(transfer_b_fields_ic_to_device,
+ TRANSFER_FIELDS_B_IC_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {
+
+ TRACE("transfer_fields_b_ic_to_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ cudaMemcpy(mp->d_b_displ_inner_core,b_displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_veloc_inner_core,b_veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_accel_inner_core,b_accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+
+}
+
+// outer_core
+extern "C"
+void FC_FUNC_(transfer_b_fields_oc_to_device,
+ TRANSFER_FIELDS_B_OC_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {
+
+ TRACE("transfer_fields_b_oc_to_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ cudaMemcpy(mp->d_b_displ_outer_core,b_displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_veloc_outer_core,b_veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_accel_outer_core,b_accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+
+}
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// transfer memory from GPU device to CPU host
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// crust_mantle
+extern "C"
+void FC_FUNC_(transfer_fields_cm_from_device,
+ TRANSFER_FIELDS_CM_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
+
+ TRACE("transfer_fields_cm_from_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+ print_CUDA_error_if_any(cudaMemcpy(veloc,mp->d_veloc_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40007);
+ print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40008);
+
+}
+
+// inner_core
+extern "C"
+void FC_FUNC_(transfer_fields_ic_from_device,
+ TRANSFER_FIELDS_IC_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
+
+ TRACE("transfer_fields_ic_from_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+ print_CUDA_error_if_any(cudaMemcpy(veloc,mp->d_veloc_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40007);
+ print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40008);
+
+}
+
+// outer_core
+extern "C"
+void FC_FUNC_(transfer_fields_oc_from_device,
+ TRANSFER_FIELDS_OC_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
+
+ TRACE("transfer_fields_oc_from_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+ print_CUDA_error_if_any(cudaMemcpy(veloc,mp->d_veloc_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40007);
+ print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40008);
+
+}
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// crust_mantle
+extern "C"
+void FC_FUNC_(transfer_b_fields_cm_from_device,
+ TRANSFER_B_FIELDS_CM_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {
+
+TRACE("transfer_b_fields_cm_from_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(b_displ,mp->d_b_displ_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_veloc,mp->d_b_veloc_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_accel,mp->d_b_accel_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost);
+
+}
+
+// inner_core
+extern "C"
+void FC_FUNC_(transfer_b_fields_ic_from_device,
+ TRANSFER_B_FIELDS_IC_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {
+
+ TRACE("transfer_fields_b_ic_from_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(b_displ,mp->d_b_displ_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+ print_CUDA_error_if_any(cudaMemcpy(b_veloc,mp->d_b_veloc_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40007);
+ print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40008);
+
+}
+
+// outer_core
+extern "C"
+void FC_FUNC_(transfer_b_fields_oc_from_device,
+ TRANSFER_B_FIELDS_OC_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f) {
+
+ TRACE("transfer_b_fields_oc_from_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(b_displ,mp->d_b_displ_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+ print_CUDA_error_if_any(cudaMemcpy(b_veloc,mp->d_b_veloc_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40007);
+ print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40008);
+
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_accel_cm_to_device,
+ TRNASFER_ACCEL_CM_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {
+
+TRACE("transfer_accel_cm_to_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_accel_crust_mantle,accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40016);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_accel_cm_from_device,
+ TRANSFER_ACCEL_CM_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {
+
+TRACE("transfer_accel_cm_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40026);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_accel_cm_from_device,
+ TRNASFER_B_ACCEL_CM_FROM_DEVICE)(int* size, realw* b_accel,long* Mesh_pointer_f) {
+
+TRACE("transfer_b_accel_cm_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40036);
+
+}
+
+
+//daniel: TODO old code routines...
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_sigma_from_device,
+ TRANSFER_SIGMA_FROM_DEVICE)(int* size, realw* sigma_kl,long* Mesh_pointer_f) {
+
+TRACE("transfer_sigma_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(sigma_kl,mp->d_Sigma_kl,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40046);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_displ_from_device,
+ TRANSFER_B_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {
+
+TRACE("transfer_b_displ_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40056);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_displ_from_device,
+ TRANSFER_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {
+
+TRACE("transfer_displ_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40066);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+/*
+extern "C"
+void FC_FUNC_(transfer_compute_kernel_answers_from_device,
+ TRANSFER_COMPUTE_KERNEL_ANSWERS_FROM_DEVICE)(long* Mesh_pointer,
+ realw* rho_kl,int* size_rho,
+ realw* mu_kl, int* size_mu,
+ realw* kappa_kl, int* size_kappa) {
+TRACE("transfer_compute_kernel_answers_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+ cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(realw),cudaMemcpyDeviceToHost);
+
+}
+*/
+
+/* ----------------------------------------------------------------------------------------------- */
+/*
+extern "C"
+void FC_FUNC_(transfer_compute_kernel_fields_from_device,
+ TRANSFER_COMPUTE_KERNEL_FIELDS_FROM_DEVICE)(long* Mesh_pointer,
+ realw* accel, int* size_accel,
+ realw* b_displ, int* size_b_displ,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz,
+ int* size_epsilondev,
+ realw* b_epsilondev_xx,
+ realw* b_epsilondev_yy,
+ realw* b_epsilondev_xy,
+ realw* b_epsilondev_xz,
+ realw* b_epsilondev_yz,
+ int* size_b_epsilondev,
+ realw* rho_kl,int* size_rho,
+ realw* mu_kl, int* size_mu,
+ realw* kappa_kl, int* size_kappa,
+ realw* epsilon_trace_over_3,
+ realw* b_epsilon_trace_over_3,
+ int* size_epsilon_trace_over_3) {
+TRACE("transfer_compute_kernel_fields_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+ cudaMemcpy(accel,mp->d_accel,*size_accel*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_displ,mp->d_b_displ,*size_b_displ*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xx,mp->d_epsilondev_xx,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_yy,mp->d_epsilondev_yy,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xy,mp->d_epsilondev_xy,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xz,mp->d_epsilondev_xz,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_yz,mp->d_epsilondev_yz,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilondev_xx,mp->d_b_epsilondev_xx,*size_b_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilondev_yy,mp->d_b_epsilondev_yy,*size_b_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilondev_xy,mp->d_b_epsilondev_xy,*size_b_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilondev_xz,mp->d_b_epsilondev_xz,*size_b_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilondev_yz,mp->d_b_epsilondev_yz,*size_b_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilon_trace_over_3,mp->d_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(realw),
+ cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilon_trace_over_3,mp->d_b_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(realw),
+ cudaMemcpyDeviceToHost);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_compute_kernel_fields_from_device");
+#endif
+}
+*/
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// attenuation fields
+
+extern "C"
+void FC_FUNC_(transfer_b_fields_att_to_device,
+ TRANSFER_B_FIELDS_ATT_TO_DEVICE)(long* Mesh_pointer,
+ realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
+ int* size_R,
+ realw* b_epsilondev_xx,
+ realw* b_epsilondev_yy,
+ realw* b_epsilondev_xy,
+ realw* b_epsilondev_xz,
+ realw* b_epsilondev_yz,
+ int* size_epsilondev) {
+ TRACE("transfer_b_fields_att_to_device");
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+
+ cudaMemcpy(mp->d_b_R_xx,b_R_xx,*size_R*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_R_yy,b_R_yy,*size_R*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_R_xy,b_R_xy,*size_R*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_R_xz,b_R_xz,*size_R*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_R_yz,b_R_yz,*size_R*sizeof(realw),cudaMemcpyHostToDevice);
+
+ cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,*size_epsilondev*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,*size_epsilondev*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,*size_epsilondev*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,*size_epsilondev*sizeof(realw),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,*size_epsilondev*sizeof(realw),cudaMemcpyHostToDevice);
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_b_fields_att_to_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// attenuation fields
+
+extern "C"
+void FC_FUNC_(transfer_fields_att_from_device,
+ TRANSFER_FIELDS_ATT_FROM_DEVICE)(long* Mesh_pointer,
+ realw* R_xx,realw* R_yy,realw* R_xy,realw* R_xz,realw* R_yz,
+ int* size_R,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz,
+ int* size_epsilondev) {
+ TRACE("transfer_fields_att_from_device");
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+
+ cudaMemcpy(R_xx,mp->d_R_xx,*size_R*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(R_yy,mp->d_R_yy,*size_R*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(R_xy,mp->d_R_xy,*size_R*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(R_xz,mp->d_R_xz,*size_R*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(R_yz,mp->d_R_yz,*size_R*sizeof(realw),cudaMemcpyDeviceToHost);
+
+ cudaMemcpy(epsilondev_xx,mp->d_epsilondev_xx,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_yy,mp->d_epsilondev_yy,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xy,mp->d_epsilondev_xy,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xz,mp->d_epsilondev_xz,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_yz,mp->d_epsilondev_yz,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_fields_att_from_device");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_kernels_el_to_host,
+ TRANSFER_KERNELS_EL_TO_HOST)(long* Mesh_pointer,
+ realw* h_rho_kl,
+ realw* h_mu_kl,
+ realw* h_kappa_kl,
+ int* NSPEC_AB) {
+TRACE("transfer_kernels_el_to_host");
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+
+ print_CUDA_error_if_any(cudaMemcpy(h_rho_kl,mp->d_rho_kl,*NSPEC_AB*NGLL3*sizeof(realw),
+ cudaMemcpyDeviceToHost),40101);
+ print_CUDA_error_if_any(cudaMemcpy(h_mu_kl,mp->d_mu_kl,*NSPEC_AB*NGLL3*sizeof(realw),
+ cudaMemcpyDeviceToHost),40102);
+ print_CUDA_error_if_any(cudaMemcpy(h_kappa_kl,mp->d_kappa_kl,*NSPEC_AB*NGLL3*sizeof(realw),
+ cudaMemcpyDeviceToHost),40103);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for NOISE simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_kernels_noise_to_host,
+ TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
+ realw* h_Sigma_kl,
+ int* NSPEC_AB) {
+TRACE("transfer_kernels_noise_to_host");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(h_Sigma_kl,mp->d_Sigma_kl,NGLL3*(*NSPEC_AB)*sizeof(realw),
+ cudaMemcpyDeviceToHost),40201);
+
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for ACOUSTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_fields_ac_to_device,
+ TRANSFER_FIELDS_AC_TO_DEVICE)(
+ int* size,
+ realw* potential_acoustic,
+ realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {
+TRACE("transfer_fields_ac_to_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_acoustic,potential_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyHostToDevice),50110);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_dot_acoustic,potential_dot_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyHostToDevice),50120);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyHostToDevice),50130);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_fields_ac_to_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_fields_ac_to_device,
+ TRANSFER_B_FIELDS_AC_TO_DEVICE)(
+ int* size,
+ realw* b_potential_acoustic,
+ realw* b_potential_dot_acoustic,
+ realw* b_potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {
+TRACE("transfer_b_fields_ac_to_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_acoustic,b_potential_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyHostToDevice),51110);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyHostToDevice),51120);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyHostToDevice),51130);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_b_fields_ac_to_device");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
+ int* size,
+ realw* potential_acoustic,
+ realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {
+TRACE("transfer_fields_ac_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(potential_acoustic,mp->d_potential_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyDeviceToHost),52111);
+ print_CUDA_error_if_any(cudaMemcpy(potential_dot_acoustic,mp->d_potential_dot_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyDeviceToHost),52121);
+ print_CUDA_error_if_any(cudaMemcpy(potential_dot_dot_acoustic,mp->d_potential_dot_dot_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyDeviceToHost),52131);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_fields_ac_from_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_fields_ac_from_device,
+ TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
+ int* size,
+ realw* b_potential_acoustic,
+ realw* b_potential_dot_acoustic,
+ realw* b_potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {
+TRACE("transfer_b_fields_ac_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(b_potential_acoustic,mp->d_b_potential_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyDeviceToHost),53111);
+ print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_acoustic,mp->d_b_potential_dot_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyDeviceToHost),53121);
+ print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_dot_acoustic,mp->d_b_potential_dot_dot_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyDeviceToHost),53131);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_b_fields_ac_from_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_dot_dot_from_device,
+ TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, realw* potential_dot_dot_acoustic,long* Mesh_pointer_f) {
+
+ TRACE("transfer_dot_dot_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(potential_dot_dot_acoustic,mp->d_potential_dot_dot_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyDeviceToHost),50041);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_dot_dot_from_device,
+ TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, realw* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {
+
+ TRACE("transfer_b_dot_dot_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_dot_acoustic,mp->d_b_potential_dot_dot_acoustic,
+ sizeof(realw)*(*size),cudaMemcpyDeviceToHost),50042);
+
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_kernels_ac_to_host,
+ TRANSFER_KERNELS_AC_TO_HOST)(long* Mesh_pointer,
+ realw* h_rho_ac_kl,
+ realw* h_kappa_ac_kl,
+ int* NSPEC_AB) {
+
+ TRACE("transfer_kernels_ac_to_host");
+
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+ int size = *NSPEC_AB*NGLL3;
+
+ // copies kernel values over to CPU host
+ print_CUDA_error_if_any(cudaMemcpy(h_rho_ac_kl,mp->d_rho_ac_kl,size*sizeof(realw),
+ cudaMemcpyDeviceToHost),54101);
+ print_CUDA_error_if_any(cudaMemcpy(h_kappa_ac_kl,mp->d_kappa_ac_kl,size*sizeof(realw),
+ cudaMemcpyDeviceToHost),54102);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for Hess kernel calculations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_kernels_hess_el_tohost,
+ TRANSFER_KERNELS_HESS_EL_TOHOST)(long* Mesh_pointer,
+ realw* h_hess_kl,
+ int* NSPEC_AB) {
+TRACE("transfer_kernels_hess_el_tohost");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(h_hess_kl,mp->d_hess_el_kl,NGLL3*(*NSPEC_AB)*sizeof(realw),
+ cudaMemcpyDeviceToHost),70201);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_kernels_hess_ac_tohost,
+ TRANSFER_KERNELS_HESS_AC_TOHOST)(long* Mesh_pointer,
+ realw* h_hess_ac_kl,
+ int* NSPEC_AB) {
+ TRACE("transfer_kernels_hess_ac_tohost");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(h_hess_ac_kl,mp->d_hess_ac_kl,NGLL3*(*NSPEC_AB)*sizeof(realw),
+ cudaMemcpyDeviceToHost),70202);
+}
+
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/write_seismograms_cuda.cu 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,316 @@
+/*
+ !=====================================================================
+ !
+ ! S p e c f e m 3 D V e r s i o n 2 . 0
+ ! ---------------------------------------
+ !
+ ! Main authors: Dimitri Komatitsch and Jeroen Tromp
+ ! Princeton University, USA and University of Pau / CNRS / INRIA
+ ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+ ! April 2011
+ !
+ ! This program is free software; you can redistribute it and/or modify
+ ! it under the terms of the GNU General Public License as published by
+ ! the Free Software Foundation; either version 2 of the License, or
+ ! (at your option) any later version.
+ !
+ ! This program is distributed in the hope that it will be useful,
+ ! but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ! GNU General Public License for more details.
+ !
+ ! You should have received a copy of the GNU General Public License along
+ ! with this program; if not, write to the Free Software Foundation, Inc.,
+ ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ !
+ !=====================================================================
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ELASTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void transfer_stations_fields_from_device_kernel(int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ibool,
+ realw* station_seismo_field,
+ realw* desired_field,
+ int nrec_local) {
+ int blockID = blockIdx.x + blockIdx.y*gridDim.x;
+ if(blockID<nrec_local) {
+ int irec = number_receiver_global[blockID]-1;
+ int ispec = ispec_selected_rec[irec]-1;
+ int iglob = ibool[threadIdx.x + NGLL3*ispec]-1;
+
+ station_seismo_field[3*NGLL3*blockID + 3*threadIdx.x+0] = desired_field[3*iglob];
+ station_seismo_field[3*NGLL3*blockID + 3*threadIdx.x+1] = desired_field[3*iglob+1];
+ station_seismo_field[3*NGLL3*blockID + 3*threadIdx.x+2] = desired_field[3*iglob+2];
+ }
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void transfer_field_from_device(Mesh* mp, realw* d_field,realw* h_field,
+ int* number_receiver_global,
+ int* d_ispec_selected,
+ int* h_ispec_selected,
+ int* ibool) {
+
+TRACE("transfer_field_from_device");
+
+ // checks if anything to do
+ if( mp->nrec_local == 0 ) return;
+
+ int blocksize = NGLL3;
+ int num_blocks_x = mp->nrec_local;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // prepare field transfer array on device
+ transfer_stations_fields_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
+ d_ispec_selected,
+ mp->d_ibool,
+ mp->d_station_seismo_field,
+ d_field,
+ mp->nrec_local);
+
+ cudaMemcpy(mp->h_station_seismo_field,mp->d_station_seismo_field,
+ (3*NGLL3)*(mp->nrec_local)*sizeof(realw),cudaMemcpyDeviceToHost);
+
+ int irec_local;
+ for(irec_local=0;irec_local<mp->nrec_local;irec_local++) {
+ int irec = number_receiver_global[irec_local] - 1;
+ int ispec = h_ispec_selected[irec] - 1;
+
+ for(int i=0;i<NGLL3;i++) {
+ int iglob = ibool[i+NGLL3*ispec] - 1;
+ h_field[0+3*iglob] = mp->h_station_seismo_field[0+3*i+irec_local*NGLL3*3];
+ h_field[1+3*iglob] = mp->h_station_seismo_field[1+3*i+irec_local*NGLL3*3];
+ h_field[2+3*iglob] = mp->h_station_seismo_field[2+3*i+irec_local*NGLL3*3];
+ }
+
+ }
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_field_from_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_station_el_from_device,
+ TRANSFER_STATION_EL_FROM_DEVICE)(realw* displ,realw* veloc,realw* accel,
+ realw* b_displ, realw* b_veloc, realw* b_accel,
+ long* Mesh_pointer_f,int* number_receiver_global,
+ int* ispec_selected_rec,int* ispec_selected_source,
+ int* ibool,int* SIMULATION_TYPEf) {
+TRACE("transfer_station_el_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+ // checks if anything to do
+ if( mp->nrec_local == 0 ) return;
+
+ int SIMULATION_TYPE = *SIMULATION_TYPEf;
+
+ if(SIMULATION_TYPE == 1) {
+ transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ }
+ else if(SIMULATION_TYPE == 2) {
+ transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ }
+ else if(SIMULATION_TYPE == 3) {
+ transfer_field_from_device(mp,mp->d_b_displ,b_displ, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_from_device(mp,mp->d_b_veloc,b_veloc, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_from_device(mp,mp->d_b_accel,b_accel, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ACOUSTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void transfer_stations_fields_acoustic_from_device_kernel(int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ibool,
+ realw* station_seismo_potential,
+ realw* desired_potential) {
+
+ int blockID = blockIdx.x + blockIdx.y*gridDim.x;
+ int nodeID = threadIdx.x + blockID*blockDim.x;
+
+ int irec = number_receiver_global[blockID]-1;
+ int ispec = ispec_selected_rec[irec]-1;
+ int iglob = ibool[threadIdx.x + NGLL3*ispec]-1;
+
+ //if(threadIdx.x == 0 ) printf("node acoustic: %i %i %i %i %i %e \n",blockID,nodeID,irec,ispec,iglob,desired_potential[iglob]);
+
+ station_seismo_potential[nodeID] = desired_potential[iglob];
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void transfer_field_acoustic_from_device(Mesh* mp,
+ realw* d_potential,
+ realw* h_potential,
+ int* number_receiver_global,
+ int* d_ispec_selected,
+ int* h_ispec_selected,
+ int* ibool) {
+
+TRACE("transfer_field_acoustic_from_device");
+
+ int irec_local,irec,ispec,iglob,j;
+
+ // checks if anything to do
+ if( mp->nrec_local == 0 ) return;
+
+ // sets up kernel dimensions
+ int blocksize = NGLL3;
+ int num_blocks_x = mp->nrec_local;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // prepare field transfer array on device
+ transfer_stations_fields_acoustic_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
+ d_ispec_selected,
+ mp->d_ibool,
+ mp->d_station_seismo_potential,
+ d_potential);
+
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->h_station_seismo_potential,mp->d_station_seismo_potential,
+ mp->nrec_local*NGLL3*sizeof(realw),cudaMemcpyDeviceToHost),500);
+
+ //printf("copy local receivers: %i \n",mp->nrec_local);
+
+ for(irec_local=0; irec_local < mp->nrec_local; irec_local++) {
+ irec = number_receiver_global[irec_local]-1;
+ ispec = h_ispec_selected[irec]-1;
+
+ // copy element values
+ // note: iglob may vary and can be irregularly accessing the h_potential array
+ for(j=0; j < NGLL3; j++){
+ iglob = ibool[j+NGLL3*ispec]-1;
+ h_potential[iglob] = mp->h_station_seismo_potential[j+irec_local*NGLL3];
+ }
+
+ // copy each station element's points to working array
+ // note: this works if iglob values would be all aligned...
+ //memcpy(&(h_potential[iglob]),&(mp->h_station_seismo_potential[irec_local*NGLL3]),NGLL3*sizeof(realw));
+
+ }
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_field_acoustic_from_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_station_ac_from_device,
+ TRANSFER_STATION_AC_FROM_DEVICE)(
+ realw* potential_acoustic,
+ realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
+ realw* b_potential_acoustic,
+ realw* b_potential_dot_acoustic,
+ realw* b_potential_dot_dot_acoustic,
+ long* Mesh_pointer_f,
+ int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ispec_selected_source,
+ int* ibool,
+ int* SIMULATION_TYPEf) {
+
+TRACE("transfer_station_ac_from_device");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+ // checks if anything to do
+ if( mp->nrec_local == 0 ) return;
+
+ int SIMULATION_TYPE = *SIMULATION_TYPEf;
+
+ if(SIMULATION_TYPE == 1) {
+ transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ }
+ else if(SIMULATION_TYPE == 2) {
+ transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ }
+ else if(SIMULATION_TYPE == 3) {
+ transfer_field_acoustic_from_device(mp,mp->d_b_potential_acoustic,b_potential_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("transfer_station_ac_from_device");
+#endif
+}
+
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/calc_jacobian.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/calc_jacobian.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/calc_jacobian.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -177,9 +177,13 @@
xgamma*(yxi*zeta-yeta*zxi)
! Check the jacobian
+ ! note: when honoring the moho, we squeeze and stretch elements
+ ! thus, it can happen that with a coarse mesh resolution, the jacobian encounters problems
if(jacobian <= ZERO) then
call xyz_2_rthetaphi_dble(xmesh,ymesh,zmesh,r,theta,phi)
- print*,'r/lat/lon:',r*R_EARTH_KM,90.0-theta*180./PI,phi*180./PI
+ print*,'error jacobian rank:',myrank
+ print*,' location r/lat/lon: ',r*R_EARTH_KM,90.0-theta*180./PI,phi*180./PI
+ print*,' jacobian: ',jacobian
call exit_MPI(myrank,'3D Jacobian undefined in recalc_jacobian_gll3D.f90')
end if
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_attenuation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_attenuation.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_attenuation.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -34,6 +34,8 @@
! Univeristy of Rhode Island
!
! <savage at uri.edu>.
+! <savage13 at gps.caltech.edu>
+! <savage13 at dtm.ciw.edu>
!
! It is based upon formulation in the following references:
!
@@ -44,23 +46,11 @@
! Velocity dispersion due to anelasticity: implications for seismology and mantle composition
! Geophys, J. R. asts. Soc, Vol 47, pp. 41-58
!
-! The methodology can be found in Brian Savage, Dimitri Komatitsch and Jeroen Tromp,
-! Effects of 3D attenuation on seismic wave amplitude and phase measurements, Bulletin of the Seismological Society of America,
-! vol. 100(3), p. 1241-1251, doi: 10.1785/0120090263 (2010).
+! The methodology can be found in Savage and Tromp, 2006, unpublished
!
-! @ARTICLE{SaKoTr10,
-! author = {Brian Savage and Dimitri Komatitsch and Jeroen Tromp},
-! title = {Effects of {3D} attenuation on seismic wave amplitude and phase measurements},
-! journal = {Bull. Seismol. Soc. Am.},
-! year = {2010},
-! volume = {100},
-! pages = {1241-1251},
-! number = {3},
-! doi = {10.1785/0120090263}}
-!
-!
!--------------------------------------------------------------------------------------------------
+
subroutine model_attenuation_broadcast(myrank,AM_V,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
! standard routine to setup model
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_s362ani.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_s362ani.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/model_s362ani.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -1102,7 +1102,7 @@
integer :: ncon,nver
-!daniel: original
+! originally define
!
! real(kind=4) verlat(1)
! real(kind=4) verlon(1)
@@ -1111,7 +1111,7 @@
! integer icon(1)
! real(kind=4) con(1)
-!daniel: avoiding out-of-bounds errors
+! avoiding out-of-bounds errors
real(kind=4) verlat(nver)
real(kind=4) verlon(nver)
real(kind=4) verrad(nver)
@@ -1255,11 +1255,12 @@
call ylm(y,x,lmax,ylmcof(1,ihpa),wk1,wk2,wk3)
else if(itypehpa(ihpa) == 2) then
numcof=numcoe(ihpa)
-!daniel
+! originally called
! call splcon(y,x,numcof,xlaspl(1,ihpa), &
! xlospl(1,ihpa),radspl(1,ihpa), &
! nconpt(ihpa),iconpt(1,ihpa),conpt(1,ihpa))
+! making sure of array bounds
call splcon(y,x,numcof,xlaspl(1:numcof,ihpa), &
xlospl(1:numcof,ihpa),radspl(1:numcof,ihpa), &
nconpt(ihpa),iconpt(1,ihpa),conpt(1,ihpa))
@@ -1416,11 +1417,12 @@
else if(itypehpa(ihpa) == 2) then
numcof=numcoe(ihpa)
-!daniel
+! originally called
! call splcon(y,x,numcof,xlaspl(1,ihpa), &
! xlospl(1,ihpa),radspl(1,ihpa), &
! nconpt(ihpa),iconpt(1,ihpa),conpt(1,ihpa))
+! making sure array bounds
call splcon(y,x,numcof,xlaspl(1:numcof,ihpa), &
xlospl(1:numcof,ihpa),radspl(1:numcof,ihpa), &
nconpt(ihpa),iconpt(1:maxver,ihpa),conpt(1:maxver,ihpa))
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/broadcast_compute_parameters.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/broadcast_compute_parameters.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/broadcast_compute_parameters.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -317,3 +317,26 @@
endif
end subroutine broadcast_compute_parameters
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine broadcast_gpu_parameters(myrank,GPU_MODE)
+
+ implicit none
+
+! standard include of the MPI library
+ include 'mpif.h'
+ include "constants.h"
+ include "precision.h"
+
+ integer:: myrank
+ logical:: GPU_MODE
+ ! local parameters
+ integer :: ier
+
+ call MPI_BCAST(GPU_MODE,1,MPI_LOGICAL,0,MPI_COMM_WORLD,ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error broadcasting GPU_MODE')
+
+ end subroutine broadcast_gpu_parameters
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -40,10 +40,10 @@
! identifier for error message file
integer, parameter :: IERROR = 30
- integer myrank
+ integer :: myrank
character(len=*) error_msg
- integer ier
+ integer :: ier
character(len=80) outputname
character(len=150) OUTPUT_FILES
@@ -93,7 +93,7 @@
character(len=*) error_msg
- integer ier
+ integer :: ier
! write error message to screen
write(*,*) error_msg(1:len(error_msg))
@@ -105,3 +105,24 @@
end subroutine exit_MPI_without_rank
+!
+!----
+!
+
+ subroutine sync_all()
+
+ implicit none
+
+! standard include of the MPI library
+ include 'mpif.h'
+
+ integer :: ier,rank
+
+ ! gets callers rank
+ call MPI_COMM_RANK(MPI_COMM_WORLD,rank,ier)
+
+ ! synchronizes MPI processes
+ call MPI_BARRIER(MPI_COMM_WORLD,ier)
+ if( ier /= 0 ) call exit_mpi(rank,'error synchronize MPI processes')
+
+ end subroutine sync_all
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/memory_eval.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/memory_eval.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/memory_eval.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -244,7 +244,7 @@
! idoubling_crust_mantle (not needed anymore..)
! static_memory_size = static_memory_size + NSPEC(IREGION_CRUST_MANTLE)*dble(SIZE_INTEGER)
! idoubling_outer_core
- static_memory_size = static_memory_size + NSPEC(IREGION_OUTER_CORE)*dble(SIZE_INTEGER)
+! static_memory_size = static_memory_size + NSPEC(IREGION_OUTER_CORE)*dble(SIZE_INTEGER)
! idoubling_inner_core
static_memory_size = static_memory_size + NSPEC(IREGION_INNER_CORE)*dble(SIZE_INTEGER)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -192,3 +192,26 @@
end subroutine read_parameter_file
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_gpu_mode(GPU_MODE)
+
+ implicit none
+ include "constants.h"
+
+ logical :: GPU_MODE
+
+ ! initializes flags
+ GPU_MODE = .false.
+
+ ! opens file Par_file
+ call open_parameter_file()
+
+ call read_value_logical(GPU_MODE, 'solver.GPU_MODE')
+
+ ! close parameter file
+ call close_parameter_file()
+
+ end subroutine read_gpu_mode
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/save_header_file.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/save_header_file.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/save_header_file.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -134,7 +134,7 @@
endif
write(IOUT,*) '!'
- write(IOUT,*) '! number of processors = ',NPROCTOT
+ write(IOUT,*) '! number of processors = ',NPROCTOT ! should be = NPROC
write(IOUT,*) '!'
write(IOUT,*) '! maximum number of points per region = ',nglob(IREGION_CRUST_MANTLE)
write(IOUT,*) '!'
@@ -147,16 +147,16 @@
write(IOUT,*) '! total points per slice = ',sum(nglob)
write(IOUT,*) '!'
- write(IOUT,*) '! total for full 6-chunk mesh:'
+ write(IOUT,'(1x,a,i1,a)') '! total for full ',NCHUNKS,'-chunk mesh:'
write(IOUT,*) '! ---------------------------'
write(IOUT,*) '!'
write(IOUT,*) '! exact total number of spectral elements in entire mesh = '
- write(IOUT,*) '! ',6.d0*dble(NPROC)*dble(sum(NSPEC)) - subtract_central_cube_elems
+ write(IOUT,*) '! ',dble(NCHUNKS)*dble(NPROC)*dble(sum(NSPEC)) - subtract_central_cube_elems
write(IOUT,*) '! approximate total number of points in entire mesh = '
- write(IOUT,*) '! ',2.d0*dble(NPROC)*(3.d0*dble(sum(nglob))) - subtract_central_cube_points
+ write(IOUT,*) '! ',dble(NCHUNKS)*dble(NPROC)*dble(sum(nglob)) - subtract_central_cube_points
! there are 3 DOFs in solid regions, but only 1 in fluid outer core
write(IOUT,*) '! approximate total number of degrees of freedom in entire mesh = '
- write(IOUT,*) '! ',6.d0*dble(NPROC)*(3.d0*(dble(sum(nglob))) &
+ write(IOUT,*) '! ',dble(NCHUNKS)*dble(NPROC)*(3.d0*(dble(sum(nglob))) &
- 2.d0*dble(nglob(IREGION_OUTER_CORE))) &
- 3.d0*subtract_central_cube_points
write(IOUT,*) '!'
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in 2012-02-14 15:11:07 UTC (rev 19622)
@@ -27,6 +27,23 @@
# @configure_input@
+# CUDA
+ at COND_CUDA_TRUE@CUDA_LIBS = -lcuda -lcudart -lcublas
+ at COND_CUDA_FALSE@CUDA_LIBS =
+
+# with configure: ./configure CUDA_LIB=.. CUDA_INC=.. MPI_INC=..
+CUDA_LIB_LOCATION = @CUDA_LIB@
+CUDA_LINK = $(CUDA_LIB_LOCATION) $(CUDA_LIBS)
+CUDA_INC = @CUDA_INC@ -I../../setup -I../../
+MPI_INC = @MPI_INC@
+
+ at COND_CUDA_TRUE@NVCC = nvcc
+ at COND_CUDA_FALSE@NVCC = g++
+
+ at COND_CUDA_TRUE@NVCC_FLAGS = $(CUDA_INC) $(MPI_INC) -DWITH_MPI -DCUDA -gencode=arch=compute_20,code=sm_20
+ at COND_CUDA_FALSE@NVCC_FLAGS = $(MPI_INC) -DWITH_MPI
+
+
FC = @FC@
FCFLAGS = #@FCFLAGS@
MPIFC = @MPIFC@
@@ -66,100 +83,134 @@
SETUP = ../../setup
## output file directory
OUTPUT = ../../OUTPUT_FILES
+# CUDAD : cuda directory
+CUDAD = ../cuda
#######################################
libspecfem_a_OBJECTS_SOLVER = \
- $O/assemble_MPI_central_cube.o \
- $O/assemble_MPI_scalar.o \
- $O/assemble_MPI_vector.o \
- $O/assemble_MPI_central_cube_block.o \
- $O/assemble_MPI_scalar_block.o \
- $O/assemble_MPI_vector_block.o \
- $O/auto_ner.o \
- $O/broadcast_compute_parameters.o \
- $O/calendar.o \
- $O/comp_source_spectrum.o \
- $O/comp_source_time_function.o \
- $O/compute_adj_source_frechet.o \
- $O/compute_arrays_source.o \
- $O/convert_time.o \
- $O/create_central_cube_buffers.o \
- $O/create_name_database.o \
- $O/count_number_of_sources.o \
- $O/define_derivation_matrices.o \
- $O/euler_angles.o \
- $O/force_ftz.o \
- $O/get_attenuation.o \
- $O/get_backazimuth.o \
- $O/get_cmt.o \
- $O/get_event_info.o \
- $O/get_model_parameters.o \
- $O/get_value_parameters.o \
- $O/gll_library.o \
- $O/hex_nodes.o \
- $O/intgrl.o \
- $O/lagrange_poly.o \
- $O/locate_receivers.o \
- $O/locate_sources.o \
- $O/make_ellipticity.o \
- $O/make_gravity.o \
- $O/model_prem.o \
- $O/model_topo_bathy.o \
- $O/multiply_arrays_source.o \
- $O/param_reader.o \
- $O/spline_routines.o \
- $O/netlib_specfun_erf.o \
- $O/read_arrays_buffers_solver.o \
- $O/read_compute_parameters.o \
- $O/read_parameter_file.o \
- $O/read_value_parameters.o \
- $O/recompute_jacobian.o \
- $O/reduce.o \
- $O/rthetaphi_xyz.o \
- $O/write_c_binary.o \
- $O/write_seismograms.o \
- $O/write_output_ASCII.o \
- $O/write_output_SAC.o \
+ $O/assemble_MPI_scalar.mpicheckno.o \
+ $O/assemble_MPI_vector.mpicheckno.o \
+ $O/assemble_MPI_scalar_block.mpicheckno.o \
+ $O/auto_ner.shared.o \
+ $O/broadcast_compute_parameters.sharedmpi.o \
+ $O/calendar.shared.o \
+ $O/comp_source_spectrum.check.o \
+ $O/comp_source_time_function.check.o \
+ $O/compute_adj_source_frechet.check.o \
+ $O/compute_arrays_source.check.o \
+ $O/convert_time.check.o \
+ $O/create_central_cube_buffers.mpicheck.o \
+ $O/create_name_database.shared.o \
+ $O/count_number_of_sources.shared.o \
+ $O/define_derivation_matrices.check.o \
+ $O/euler_angles.shared.o \
+ $O/force_ftz.cc.o \
+ $O/get_attenuation.check.o \
+ $O/get_backazimuth.check.o \
+ $O/get_cmt.check.o \
+ $O/get_event_info.mpicheck.o \
+ $O/get_model_parameters.shared.o \
+ $O/get_value_parameters.shared.o \
+ $O/gll_library.shared.o \
+ $O/hex_nodes.shared.o \
+ $O/intgrl.shared.o \
+ $O/lagrange_poly.shared.o \
+ $O/locate_receivers.mpicheck.o \
+ $O/locate_sources.mpicheck.o \
+ $O/make_ellipticity.shared.o \
+ $O/make_gravity.shared.o \
+ $O/model_prem.shared.o \
+ $O/model_topo_bathy.sharedmpi.o \
+ $O/multiply_arrays_source.check.o \
+ $O/param_reader.cc.o \
+ $O/spline_routines.shared.o \
+ $O/netlib_specfun_erf.check.o \
+ $O/read_arrays_buffers_solver.mpicheck.o \
+ $O/read_compute_parameters.shared.o \
+ $O/read_parameter_file.shared.o \
+ $O/read_value_parameters.shared.o \
+ $O/recompute_jacobian.check.o \
+ $O/reduce.shared.o \
+ $O/rthetaphi_xyz.shared.o \
+ $O/write_c_binary.cc.o \
+ $O/write_seismograms.mpicheck.o \
+ $O/write_output_ASCII.mpicheck.o \
+ $O/write_output_SAC.mpicheck.o \
$(EMPTY_MACRO)
# solver objects with statically allocated arrays; dependent upon
# values_from_mesher.h
SOLVER_ARRAY_OBJECTS = \
- $O/check_simulation_stability.o \
- $O/compute_add_sources.o \
- $O/compute_boundary_kernel.o \
- $O/compute_coupling.o \
- $O/compute_element.o \
- $O/compute_forces_crust_mantle.o \
- $O/compute_forces_crust_mantle_Dev.o \
- $O/compute_forces_inner_core.o \
- $O/compute_forces_inner_core_Dev.o \
- $O/compute_forces_outer_core.o \
- $O/compute_forces_outer_core_Dev.o \
- $O/compute_kernels.o \
- $O/compute_seismograms.o \
- $O/compute_stacey_crust_mantle.o \
- $O/compute_stacey_outer_core.o \
- $O/fix_non_blocking_flags.o \
- $O/initialize_simulation.o \
- $O/prepare_timerun.o \
- $O/read_arrays_solver.o \
- $O/read_forward_arrays.o \
- $O/read_mesh_databases.o \
- $O/save_forward_arrays.o \
- $O/save_kernels.o \
- $O/setup_sources_receivers.o \
- $O/specfem3D.o \
- $O/write_movie_volume.o \
- $O/write_movie_surface.o \
- $O/noise_tomography.o \
+ $O/specfem3D_par.Solver.o \
+ $O/assemble_MPI_central_cube_block.mpisolver.o \
+ $O/assemble_MPI_central_cube.mpisolver.o \
+ $O/check_simulation_stability.mpisolver.o \
+ $O/compute_add_sources.solver.o \
+ $O/compute_boundary_kernel.solvercheck.o \
+ $O/compute_coupling.solver.o \
+ $O/compute_element.Solver.o \
+ $O/compute_forces_acoustic.Solver.o \
+ $O/compute_forces_elastic.Solver.o \
+ $O/compute_forces_crust_mantle.solver.o \
+ $O/compute_forces_crust_mantle_Dev.Solver.o \
+ $O/compute_forces_inner_core.solver.o \
+ $O/compute_forces_inner_core_Dev.Solver.o \
+ $O/compute_forces_outer_core.solver.o \
+ $O/compute_forces_outer_core_Dev.solver.o \
+ $O/compute_kernels.solver.o \
+ $O/compute_seismograms.solver.o \
+ $O/compute_stacey_crust_mantle.solver.o \
+ $O/compute_stacey_outer_core.solver.o \
+ $O/fix_non_blocking_flags.mpisolvercheck.o \
+ $O/finalize_simulation.mpisolver.o \
+ $O/initialize_simulation.mpisolver.o \
+ $O/iterate_time.mpiSolver.o \
+ $O/noise_tomography.mpisolver.o \
+ $O/prepare_timerun.mpisolver.o \
+ $O/read_arrays_solver.solver.o \
+ $O/read_forward_arrays.solver.o \
+ $O/read_mesh_databases.mpisolver.o \
+ $O/read_topography_bathymetry.mpisolver.o \
+ $O/save_forward_arrays.solver.o \
+ $O/save_kernels.solver.o \
+ $O/setup_GLL_points.mpisolver.o \
+ $O/setup_sources_receivers.mpisolver.o \
+ $O/specfem3D.mpiSolver.o \
+ $O/write_movie_output.mpisolver.o \
+ $O/write_movie_volume.mpisolver.o \
+ $O/write_movie_surface.mpisolver.o \
$(EMPTY_MACRO)
+CUDA_OBJECTS = \
+ $O/assemble_MPI_scalar_cuda.cuda.o \
+ $O/assemble_MPI_vector_cuda.cuda.o \
+ $O/check_fields_cuda.cuda.o \
+ $O/compute_add_sources_acoustic_cuda.cuda.o \
+ $O/compute_add_sources_elastic_cuda.cuda.o \
+ $O/compute_coupling_cuda.cuda.o \
+ $O/compute_forces_crust_mantle_cuda.cuda.o \
+ $O/compute_forces_inner_core_cuda.cuda.o \
+ $O/compute_forces_outer_core_cuda.cuda.o \
+ $O/compute_kernels_cuda.cuda.o \
+ $O/compute_stacey_acoustic_cuda.cuda.o \
+ $O/compute_stacey_elastic_cuda.cuda.o \
+ $O/it_update_displacement_cuda.cuda.o \
+ $O/noise_tomography_cuda.cuda.o \
+ $O/prepare_mesh_constants_cuda.cuda.o \
+ $O/transfer_fields_cuda.cuda.o \
+ $O/write_seismograms_cuda.cuda.o \
+ $O/save_and_compare_cpu_vs_gpu.cudacc.o
+CUDA_STUBS = \
+ $O/specfem3D_gpu_cuda_method_stubs.cudacc.o
+
+
+
LIBSPECFEM_SOLVER = $O/libspecfem_solver.a
+
#######################################
####
@@ -188,11 +239,13 @@
####
# solver also depends on values from mesher
-XSPECFEM_OBJECTS = $(SOLVER_ARRAY_OBJECTS) $O/exit_mpi.o $(LIBSPECFEM_SOLVER)
+ at COND_CUDA_TRUE@XSPECFEM_OBJECTS = $(SOLVER_ARRAY_OBJECTS) $O/exit_mpi.sharedmpi.o $(LIBSPECFEM_SOLVER) $(CUDA_OBJECTS)
+ at COND_CUDA_FALSE@XSPECFEM_OBJECTS = $(SOLVER_ARRAY_OBJECTS) $O/exit_mpi.sharedmpi.o $(LIBSPECFEM_SOLVER) $(CUDA_STUBS)
+
xspecfem3D: $(XSPECFEM_OBJECTS)
## use MPI here
- ${MPIFCCOMPILE_NO_CHECK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(MPILIBS)
+ ${MPIFCCOMPILE_NO_CHECK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(MPILIBS) $(CUDA_LINK)
reqheader:
(cd ../create_header_file; make)
@@ -220,287 +273,75 @@
#### rule for each .o file below
####
+
##
## shared
##
-$O/auto_ner.o: ${SETUP}/constants.h ${SHARED}/auto_ner.f90
- ${FCCOMPILE_CHECK} -c -o $O/auto_ner.o ${FCFLAGS_f90} ${SHARED}/auto_ner.f90
+$O/%.shared.o: ${SHARED}/%.f90 ${SETUP}/constants.h
+ ${FCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-$O/broadcast_compute_parameters.o: ${SETUP}/constants.h ${SHARED}/broadcast_compute_parameters.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/broadcast_compute_parameters.o ${FCFLAGS_f90} ${SHARED}/broadcast_compute_parameters.f90
+$O/%.sharedmpi.o: ${SHARED}/%.f90 ${SETUP}/constants.h
+ ${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-$O/calendar.o: ${SHARED}/calendar.f90
- ${FCCOMPILE_CHECK} -c -o $O/calendar.o ${FCFLAGS_f90} ${SHARED}/calendar.f90
+$O/%.cc.o: ${SHARED}/%.c ${SETUP}/config.h
+ ${CC} -c $(CPPFLAGS) $(CFLAGS) -o $@ $<
-$O/count_number_of_sources.o: ${SETUP}/constants.h ${SHARED}/count_number_of_sources.f90
- ${FCCOMPILE_CHECK} -c -o $O/count_number_of_sources.o ${FCFLAGS_f90} ${SHARED}/count_number_of_sources.f90
+#######################################
-$O/create_name_database.o: ${SETUP}/constants.h ${SHARED}/create_name_database.f90
- ${FCCOMPILE_CHECK} -c -o $O/create_name_database.o ${FCFLAGS_f90} ${SHARED}/create_name_database.f90
-
-$O/create_serial_name_database.o: ${SETUP}/constants.h ${SHARED}/create_serial_name_database.f90
- ${FCCOMPILE_CHECK} -c -o $O/create_serial_name_database.o ${FCFLAGS_f90} ${SHARED}/create_serial_name_database.f90
-
-$O/euler_angles.o: ${SETUP}/constants.h ${SHARED}/euler_angles.f90
- ${FCCOMPILE_CHECK} -c -o $O/euler_angles.o ${FCFLAGS_f90} ${SHARED}/euler_angles.f90
-
-$O/get_model_parameters.o: ${SETUP}/constants.h ${SHARED}/get_model_parameters.f90
- ${FCCOMPILE_CHECK} -c -o $O/get_model_parameters.o ${FCFLAGS_f90} ${SHARED}/get_model_parameters.f90
-
-$O/get_value_parameters.o: ${SETUP}/constants.h ${SHARED}/get_value_parameters.f90
- ${FCCOMPILE_CHECK} -c -o $O/get_value_parameters.o ${FCFLAGS_f90} ${SHARED}/get_value_parameters.f90
-
-$O/gll_library.o: ${SETUP}/constants.h ${SHARED}/gll_library.f90
- ${FCCOMPILE_CHECK} -c -o $O/gll_library.o ${FCFLAGS_f90} ${SHARED}/gll_library.f90
-
-$O/hex_nodes.o: ${SETUP}/constants.h ${SHARED}/hex_nodes.f90
- ${FCCOMPILE_CHECK} -c -o $O/hex_nodes.o ${FCFLAGS_f90} ${SHARED}/hex_nodes.f90
-
-$O/intgrl.o: ${SETUP}/constants.h ${SHARED}/intgrl.f90
- ${FCCOMPILE_CHECK} -c -o $O/intgrl.o ${FCFLAGS_f90} ${SHARED}/intgrl.f90
-
-$O/lagrange_poly.o: ${SETUP}/constants.h ${SHARED}/lagrange_poly.f90
- ${FCCOMPILE_CHECK} -c -o $O/lagrange_poly.o ${FCFLAGS_f90} ${SHARED}/lagrange_poly.f90
-
-$O/make_ellipticity.o: ${SETUP}/constants.h ${SHARED}/make_ellipticity.f90
- ${FCCOMPILE_CHECK} -c -o $O/make_ellipticity.o ${FCFLAGS_f90} ${SHARED}/make_ellipticity.f90
-
-$O/make_gravity.o: ${SETUP}/constants.h ${SHARED}/make_gravity.f90
- ${FCCOMPILE_CHECK} -c -o $O/make_gravity.o ${FCFLAGS_f90} ${SHARED}/make_gravity.f90
-
-$O/memory_eval.o: ${SETUP}/constants.h ${SHARED}/memory_eval.f90
- ${FCCOMPILE_CHECK} -c -o $O/memory_eval.o ${FCFLAGS_f90} ${SHARED}/memory_eval.f90
-
-$O/model_prem.o: ${SETUP}/constants.h ${SHARED}/model_prem.f90
- ${FCCOMPILE_CHECK} -c -o $O/model_prem.o ${FCFLAGS_f90} ${SHARED}/model_prem.f90
-
-### C compilation
-$O/force_ftz.o: ${SHARED}/force_ftz.c ${SETUP}/config.h
- ${CC} -c $(CPPFLAGS) $(CFLAGS) -o $O/force_ftz.o ${SHARED}/force_ftz.c
-
-$O/param_reader.o: ${SHARED}/param_reader.c ${SETUP}/config.h
- ${CC} -c $(CPPFLAGS) $(CFLAGS) -o $O/param_reader.o ${SHARED}/param_reader.c
-
-$O/read_compute_parameters.o: ${SETUP}/constants.h ${SHARED}/read_compute_parameters.f90
- ${FCCOMPILE_CHECK} -c -o $O/read_compute_parameters.o ${FCFLAGS_f90} ${SHARED}/read_compute_parameters.f90
-
-$O/read_parameter_file.o: ${SETUP}/constants.h ${SHARED}/read_parameter_file.f90
- ${FCCOMPILE_CHECK} -c -o $O/read_parameter_file.o ${FCFLAGS_f90} ${SHARED}/read_parameter_file.f90
-
-$O/read_value_parameters.o: ${SETUP}/constants.h ${SHARED}/read_value_parameters.f90
- ${FCCOMPILE_CHECK} -c -o $O/read_value_parameters.o ${FCFLAGS_f90} ${SHARED}/read_value_parameters.f90
-
-$O/reduce.o: ${SETUP}/constants.h ${SHARED}/reduce.f90
- ${FCCOMPILE_CHECK} -c -o $O/reduce.o ${FCFLAGS_f90} ${SHARED}/reduce.f90
-
-$O/rthetaphi_xyz.o: ${SETUP}/constants.h ${SHARED}/rthetaphi_xyz.f90
- ${FCCOMPILE_CHECK} -c -o $O/rthetaphi_xyz.o ${FCFLAGS_f90} ${SHARED}/rthetaphi_xyz.f90
-
-$O/save_header_file.o: ${SETUP}/constants.h ${SHARED}/save_header_file.f90
- ${FCCOMPILE_CHECK} -c -o $O/save_header_file.o ${FCFLAGS_f90} ${SHARED}/save_header_file.f90
-
-$O/spline_routines.o: ${SETUP}/constants.h ${SHARED}/spline_routines.f90
- ${FCCOMPILE_CHECK} -c -o $O/spline_routines.o ${FCFLAGS_f90} ${SHARED}/spline_routines.f90
-
-### C compilation
-$O/write_c_binary.o: ${SHARED}/write_c_binary.c ${SETUP}/config.h
- $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $O/write_c_binary.o ${SHARED}/write_c_binary.c
-
-##
-## shared objects with mpi compilation
-##
-$O/exit_mpi.o: ${SETUP}/constants.h ${SHARED}/exit_mpi.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/exit_mpi.o ${FCFLAGS_f90} ${SHARED}/exit_mpi.f90
-
-$O/model_topo_bathy.o: ${SETUP}/constants.h ${SHARED}/model_topo_bathy.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/model_topo_bathy.o ${FCFLAGS_f90} ${SHARED}/model_topo_bathy.f90
-
-
###
### specfem3D - optimized flags and dependence on values from mesher here
###
-$O/compute_add_sources.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_add_sources.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_add_sources.o ${FCFLAGS_f90} $S/compute_add_sources.f90
+$O/%.solver.o: $S/%.f90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
+ ${FCCOMPILE_NO_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-$O/compute_boundary_kernel.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_boundary_kernel.f90
- ${FCCOMPILE_CHECK} -c -o $O/compute_boundary_kernel.o ${FCFLAGS_f90} $S/compute_boundary_kernel.f90
+$O/%.Solver.o: $S/%.F90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
+ ${FCCOMPILE_NO_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-$O/compute_coupling.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_coupling.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_coupling.o ${FCFLAGS_f90} $S/compute_coupling.f90
+$O/%.solvercheck.o: $S/%.f90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
+ ${FCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-$O/compute_element.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_element.F90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_element.o ${FCFLAGS_f90} $S/compute_element.F90
+$O/%.mpisolver.o: $S/%.f90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
+ ${MPIFCCOMPILE_NO_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-$O/compute_forces_crust_mantle.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_crust_mantle.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_forces_crust_mantle.o ${FCFLAGS_f90} $S/compute_forces_crust_mantle.f90
+$O/%.mpiSolver.o: $S/%.F90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
+ ${MPIFCCOMPILE_NO_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-$O/compute_forces_crust_mantle_Dev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_crust_mantle_Dev.F90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_forces_crust_mantle_Dev.o ${FCFLAGS_f90} $S/compute_forces_crust_mantle_Dev.F90
+$O/%.mpisolvercheck.o: $S/%.f90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
+ ${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-$O/compute_forces_outer_core.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_outer_core.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_forces_outer_core.o ${FCFLAGS_f90} $S/compute_forces_outer_core.f90
+#######################################
-$O/compute_forces_outer_core_Dev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_outer_core_Dev.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_forces_outer_core_Dev.o ${FCFLAGS_f90} $S/compute_forces_outer_core_Dev.f90
-
-$O/compute_forces_inner_core.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_inner_core.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_forces_inner_core.o ${FCFLAGS_f90} $S/compute_forces_inner_core.f90
-
-$O/compute_forces_inner_core_Dev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_inner_core_Dev.F90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_forces_inner_core_Dev.o ${FCFLAGS_f90} $S/compute_forces_inner_core_Dev.F90
-
-$O/compute_kernels.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_kernels.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_kernels.o ${FCFLAGS_f90} $S/compute_kernels.f90
-
-$O/compute_seismograms.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_seismograms.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_seismograms.o ${FCFLAGS_f90} $S/compute_seismograms.f90
-
-$O/compute_stacey_crust_mantle.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_stacey_crust_mantle.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_stacey_crust_mantle.o ${FCFLAGS_f90} $S/compute_stacey_crust_mantle.f90
-
-$O/compute_stacey_outer_core.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_stacey_outer_core.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/compute_stacey_outer_core.o ${FCFLAGS_f90} $S/compute_stacey_outer_core.f90
-
-$O/read_arrays_solver.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/read_arrays_solver.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/read_arrays_solver.o ${FCFLAGS_f90} $S/read_arrays_solver.f90
-
-$O/read_forward_arrays.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/read_forward_arrays.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/read_forward_arrays.o ${FCFLAGS_f90} $S/read_forward_arrays.f90
-
-$O/save_forward_arrays.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/save_forward_arrays.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/save_forward_arrays.o ${FCFLAGS_f90} $S/save_forward_arrays.f90
-
-$O/save_kernels.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/save_kernels.f90
- ${FCCOMPILE_NO_CHECK} -c -o $O/save_kernels.o ${FCFLAGS_f90} $S/save_kernels.f90
-
-###
-### specfem3D - regular compilation options here
-###
-$O/comp_source_time_function.o: $S/comp_source_time_function.f90
- ${FCCOMPILE_CHECK} -c -o $O/comp_source_time_function.o ${FCFLAGS_f90} $S/comp_source_time_function.f90
-
-$O/comp_source_spectrum.o: ${SETUP}/constants.h $S/comp_source_spectrum.f90
- ${FCCOMPILE_CHECK} -c -o $O/comp_source_spectrum.o ${FCFLAGS_f90} $S/comp_source_spectrum.f90
-
-$O/compute_adj_source_frechet.o: ${SETUP}/constants.h $S/compute_adj_source_frechet.f90
- ${FCCOMPILE_CHECK} -c -o $O/compute_adj_source_frechet.o ${FCFLAGS_f90} $S/compute_adj_source_frechet.f90
-
-$O/compute_arrays_source.o: ${SETUP}/constants.h $S/compute_arrays_source.f90
- ${FCCOMPILE_CHECK} -c -o $O/compute_arrays_source.o ${FCFLAGS_f90} $S/compute_arrays_source.f90
-
-$O/convert_time.o: $S/convert_time.f90
- ${FCCOMPILE_CHECK} -c -o $O/convert_time.o ${FCFLAGS_f90} $S/convert_time.f90
-
-$O/define_derivation_matrices.o: ${SETUP}/constants.h $S/define_derivation_matrices.f90
- ${FCCOMPILE_CHECK} -c -o $O/define_derivation_matrices.o ${FCFLAGS_f90} $S/define_derivation_matrices.f90
-
-$O/get_attenuation.o: ${SETUP}/constants.h $S/get_attenuation.f90
- ${FCCOMPILE_CHECK} -c -o $O/get_attenuation.o ${FCFLAGS_f90} $S/get_attenuation.f90
-
-$O/get_backazimuth.o: ${SETUP}/constants.h $S/get_backazimuth.f90
- ${FCCOMPILE_CHECK} -c -o $O/get_backazimuth.o ${FCFLAGS_f90} $S/get_backazimuth.f90
-
-$O/get_cmt.o: ${SETUP}/constants.h $S/get_cmt.f90
- ${FCCOMPILE_CHECK} -c -o $O/get_cmt.o ${FCFLAGS_f90} $S/get_cmt.f90
-
-$O/multiply_arrays_source.o: ${SETUP}/constants.h $S/multiply_arrays_source.f90
- ${FCCOMPILE_CHECK} -c -o $O/multiply_arrays_source.o ${FCFLAGS_f90} $S/multiply_arrays_source.f90
-
-$O/netlib_specfun_erf.o: $S/netlib_specfun_erf.f90
- ${FCCOMPILE_CHECK} -c -o $O/netlib_specfun_erf.o ${FCFLAGS_f90} $S/netlib_specfun_erf.f90
-
-$O/recompute_jacobian.o: ${SETUP}/constants.h $S/recompute_jacobian.f90
- ${FCCOMPILE_CHECK} -c -o $O/recompute_jacobian.o ${FCFLAGS_f90} $S/recompute_jacobian.f90
-
##
-## specfem3D - use MPI here & dependent on values from mesher here
-##
-$O/assemble_MPI_central_cube.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/assemble_MPI_central_cube.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/assemble_MPI_central_cube.o ${FCFLAGS_f90} $S/assemble_MPI_central_cube.f90
-
-$O/assemble_MPI_central_cube_block.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/assemble_MPI_central_cube_block.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/assemble_MPI_central_cube_block.o ${FCFLAGS_f90} $S/assemble_MPI_central_cube_block.f90
-
-$O/check_simulation_stability.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/check_simulation_stability.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/check_simulation_stability.o ${FCFLAGS_f90} $S/check_simulation_stability.f90
-
-$O/fix_non_blocking_flags.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/fix_non_blocking_flags.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/fix_non_blocking_flags.o ${FCFLAGS_f90} $S/fix_non_blocking_flags.f90
-
-$O/initialize_simulation.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/initialize_simulation.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/initialize_simulation.o ${FCFLAGS_f90} $S/initialize_simulation.f90
-
-$O/noise_tomography.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/noise_tomography.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/noise_tomography.o ${FCFLAGS_f90} $S/noise_tomography.f90
-
-$O/prepare_timerun.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/prepare_timerun.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/prepare_timerun.o ${FCFLAGS_f90} $S/prepare_timerun.f90
-
-$O/read_mesh_databases.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/read_mesh_databases.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/read_mesh_databases.o ${FCFLAGS_f90} $S/read_mesh_databases.f90
-
-$O/setup_sources_receivers.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/setup_sources_receivers.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/setup_sources_receivers.o ${FCFLAGS_f90} $S/setup_sources_receivers.f90
-
-$O/specfem3D.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/specfem3D.F90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/specfem3D.o ${FCFLAGS_f90} $S/specfem3D.F90
-
-$O/write_movie_surface.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/write_movie_surface.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/write_movie_surface.o ${FCFLAGS_f90} $S/write_movie_surface.f90
-
-$O/write_movie_volume.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/write_movie_volume.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/write_movie_volume.o ${FCFLAGS_f90} $S/write_movie_volume.f90
-
-
-##
## specfem3D - non-dependent on values from mesher here
##
-$O/assemble_MPI_scalar.o: ${SETUP}/constants.h $S/assemble_MPI_scalar.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/assemble_MPI_scalar.o ${FCFLAGS_f90} $S/assemble_MPI_scalar.f90
+$O/%.check.o: $S/%.f90 ${SETUP}/constants.h
+ ${FCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-$O/assemble_MPI_scalar_block.o: ${SETUP}/constants.h $S/assemble_MPI_scalar_block.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/assemble_MPI_scalar_block.o ${FCFLAGS_f90} $S/assemble_MPI_scalar_block.f90
+$O/%.mpicheckno.o: $S/%.f90 ${SETUP}/constants.h
+ ${MPIFCCOMPILE_NO_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-$O/assemble_MPI_vector.o: ${SETUP}/constants.h $S/assemble_MPI_vector.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/assemble_MPI_vector.o ${FCFLAGS_f90} $S/assemble_MPI_vector.f90
+$O/%.mpicheck.o: %.f90 ${SETUP}/constants.h
+ ${MPIFCCOMPILE_CHECK} -c -o $@ ${FCFLAGS_f90} $<
-$O/assemble_MPI_vector_block.o: ${SETUP}/constants.h $S/assemble_MPI_vector_block.f90
- ${MPIFCCOMPILE_NO_CHECK} -c -o $O/assemble_MPI_vector_block.o ${FCFLAGS_f90} $S/assemble_MPI_vector_block.f90
+#######################################
+
###
-### specfem3D - regular MPI compilation options here
+### CUDA compilation
###
-$O/create_central_cube_buffers.o: ${SETUP}/constants.h $S/create_central_cube_buffers.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/create_central_cube_buffers.o ${FCFLAGS_f90} $S/create_central_cube_buffers.f90
-$O/get_event_info.o: ${SETUP}/constants.h $S/get_event_info.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/get_event_info.o ${FCFLAGS_f90} $S/get_event_info.f90
+$O/%.cuda.o: ${CUDAD}/%.cu ${SETUP}/config.h ${CUDAD}/mesh_constants_cuda.h ${CUDAD}/prepare_constants_cuda.h
+ ${NVCC} -c $< -o $@ $(NVCC_FLAGS)
-$O/locate_receivers.o: ${SETUP}/constants.h $S/locate_receivers.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/locate_receivers.o ${FCFLAGS_f90} $S/locate_receivers.f90
+$O/%.cudacc.o: ${CUDAD}/%.c ${SETUP}/config.h
+ ${CC} -c $(CPPFLAGS) $(CFLAGS) $(MPI_INC) -o $@ ${CUDAD}/$<
-$O/locate_sources.o: ${SETUP}/constants.h $S/locate_sources.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/locate_sources.o ${FCFLAGS_f90} $S/locate_sources.f90
-
-$O/read_arrays_buffers_solver.o: ${SETUP}/constants.h $S/read_arrays_buffers_solver.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/read_arrays_buffers_solver.o ${FCFLAGS_f90} $S/read_arrays_buffers_solver.f90
-
-$O/write_seismograms.o: ${SETUP}/constants.h $S/write_seismograms.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/write_seismograms.o ${FCFLAGS_f90} $S/write_seismograms.f90
-
-$O/write_output_ASCII.o: ${SETUP}/constants.h $S/write_output_ASCII.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/write_output_ASCII.o ${FCFLAGS_f90} $S/write_output_ASCII.f90
-
-$O/write_output_SAC.o: ${SETUP}/constants.h $S/write_output_SAC.f90
- ${MPIFCCOMPILE_CHECK} -c -o $O/write_output_SAC.o ${FCFLAGS_f90} $S/write_output_SAC.f90
-
-
#######################################
-
###
### rule for the header file
###
+
${OUTPUT}/values_from_mesher.h: reqheader
(mkdir -p ${OUTPUT}; cd ${S_TOP}/; ./bin/xcreate_header_file)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -26,119 +26,100 @@
!=====================================================================
subroutine assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
- ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,vector_assemble,ndim_assemble,iphase_CC)
+ npoin2D_cube_from_slices, &
+ buffer_all_cube_from_slices,buffer_slices, &
+ request_send_cc,request_receive_cc, &
+ request_send_array_cc,request_receive_array_cc, &
+ ibool_central_cube, &
+ receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
+ ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE, &
+ vector_assemble,ndim_assemble, &
+ iphase_comm_CC)
implicit none
-! standard include of the MPI library
+ ! standard include of the MPI library
include 'mpif.h'
include 'constants.h'
-! include values created by the mesher
+ ! include values created by the mesher
include "OUTPUT_FILES/values_from_mesher.h"
-! for matching with central cube in inner core
+ ! for matching with central cube in inner core
integer, intent(in) :: ichunk, nb_msgs_theor_in_cube, npoin2D_cube_from_slices
integer, intent(in) :: ndim_assemble
integer, intent(in) :: receiver_cube_from_slices
- integer, intent(inout) :: iphase_CC
+ integer, intent(inout) :: iphase_comm_CC
integer, dimension(nb_msgs_theor_in_cube), intent(in) :: sender_from_slices_to_cube
+
double precision, dimension(npoin2D_cube_from_slices,ndim_assemble), intent(inout) :: buffer_slices
double precision, dimension(npoin2D_cube_from_slices,ndim_assemble,nb_msgs_theor_in_cube), intent(inout) :: &
- buffer_all_cube_from_slices
+ buffer_all_cube_from_slices
+
+ ! note: these parameters are "saved" now as global parameters
+ ! MPI status of messages to be received
+ integer, intent(inout) :: request_send_cc,request_receive_cc
+ ! maximum value of nb_msgs_theor_in_cube is 5 (when NPROC_XI == 1)
+ ! therefore NPROC_XI+4 is always large enough
+ integer, dimension(NPROC_XI_VAL+4), intent(inout) :: request_send_array_cc,request_receive_array_cc
+
integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices), intent(in) :: ibool_central_cube
-! local to global mapping
+ ! local to global mapping
integer, intent(in) :: NSPEC2D_BOTTOM_INNER_CORE
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE), intent(in) :: ibool_inner_core
integer, dimension(NSPEC_INNER_CORE), intent(in) :: idoubling_inner_core
integer, dimension(NSPEC2D_BOTTOM_INNER_CORE), intent(in) :: ibelm_bottom_inner_core
-! vector
+ ! vector
real(kind=CUSTOM_REAL), dimension(ndim_assemble,NGLOB_INNER_CORE), intent(inout) :: vector_assemble
+! local parameters
+
integer ipoin,idimension, ispec2D, ispec
integer i,j,k
integer sender,receiver,imsg
real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: array_central_cube
-! MPI status of messages to be received
- integer, save :: request_send,request_receive
-! maximum value of nb_msgs_theor_in_cube is 5 (when NPROC_XI == 1)
-! therefore NPROC_XI+4 is always large enough
- integer, dimension(NPROC_XI_VAL+4), save :: request_send_array,request_receive_array
logical :: flag_result_test
integer, dimension(MPI_STATUS_SIZE) :: msg_status
integer :: ier
-! mask
+ ! mask
logical, dimension(NGLOB_INNER_CORE) :: mask
-!---
-!--- use buffers to assemble mass matrix with central cube once and for all
-!---
+ !---
+ !--- use buffers to assemble mass matrix with central cube once and for all
+ !---
- if(iphase_CC == 1) then
+ select case( iphase_comm_CC )
-! on chunks AB and AB_ANTIPODE, receive all the messages from slices
- if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
- do imsg = 1,nb_msgs_theor_in_cube-1
-! receive buffers from slices
- sender = sender_from_slices_to_cube(imsg)
- call MPI_IRECV(buffer_all_cube_from_slices(1,1,imsg), &
- ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
- itag,MPI_COMM_WORLD,request_receive_array(imsg),ier)
- enddo
- endif
+ case( 1 )
-! send info to central cube from all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
- if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-! for bottom elements in contact with central cube from the slices side
- ipoin = 0
- do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
- ispec = ibelm_bottom_inner_core(ispec2D)
-! only for DOFs exactly on surface of central cube (bottom of these elements)
- k = 1
- do j = 1,NGLLY
- do i = 1,NGLLX
- ipoin = ipoin + 1
- buffer_slices(ipoin,:) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
- enddo
- enddo
- enddo
-! send buffer to central cube
- receiver = receiver_cube_from_slices
- call MPI_ISEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
- MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,request_send,ier)
- endif ! end sending info to central cube
+ ! non-central-cube chunks send values to receiver central cube chunks AB or AB_ANTIPODE
- iphase_CC = iphase_CC + 1
- return ! exit because we have started some communications therefore we need some time
+ ! note: only chunks AB and AB_ANTIPODE contain valid central cube elements,
+ ! all other have only fictitious central cube elements
- endif !!!!!!!!! end of iphase_CC 1
+ ! on chunks AB and AB_ANTIPODE, receive all the messages from slices
+ if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+ do imsg = 1,nb_msgs_theor_in_cube-1
+ ! receive buffers from slices
+ sender = sender_from_slices_to_cube(imsg)
+ call MPI_IRECV(buffer_all_cube_from_slices(1,1,imsg), &
+ ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+ itag,MPI_COMM_WORLD,request_receive_array_cc(imsg),ier)
+ enddo
+ endif
- if(iphase_CC == 2) then
-
- if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
- call MPI_TEST(request_send,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- endif
-
- if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
- do imsg = 1,nb_msgs_theor_in_cube-1
- call MPI_TEST(request_receive_array(imsg),flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
- enddo
- endif
-
-! exchange of their bottom faces between chunks AB and AB_ANTIPODE
- if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
- ipoin = 0
- do ispec = NSPEC_INNER_CORE, 1, -1
- if (idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE) then
+ ! send info to central cube from all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+ if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+ ! for bottom elements in contact with central cube from the slices side
+ ipoin = 0
+ do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+ ispec = ibelm_bottom_inner_core(ispec2D)
+ ! only for DOFs exactly on surface of central cube (bottom of these elements)
k = 1
do j = 1,NGLLY
do i = 1,NGLLX
@@ -146,183 +127,226 @@
buffer_slices(ipoin,:) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
enddo
enddo
- endif
- enddo
- sender = sender_from_slices_to_cube(nb_msgs_theor_in_cube)
-! call MPI_SENDRECV(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
-! itag,buffer_slices2,ndim_assemble*npoin2D_cube_from_slices,&
-! MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ enddo
+ ! send buffer to central cube
+ receiver = receiver_cube_from_slices
+ call MPI_ISEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
+ MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,request_send_cc,ier)
+ endif ! end sending info to central cube
- call MPI_IRECV(buffer_all_cube_from_slices(1,1,nb_msgs_theor_in_cube), &
- ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,request_receive,ier)
-!! DK DK this merged with previous statement
-! buffer_all_cube_from_slices(:,:,nb_msgs_theor_in_cube) = buffer_slices2(:,:)
+ iphase_comm_CC = iphase_comm_CC + 1
+ return ! exit because we have started some communications therefore we need some time
- call MPI_ISEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
- itag,MPI_COMM_WORLD,request_send,ier)
- endif
+ case( 2 )
- iphase_CC = iphase_CC + 1
- return ! exit because we have started some communications therefore we need some time
+ ! central cube chunks AB and AB_ANTIPODE send values to each other
- endif !!!!!!!!! end of iphase_CC 2
+ ! checks that chunks have sent out messages
+ if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+ call MPI_TEST(request_send_cc,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ endif
- if(iphase_CC == 3) then
-
-!--- now we need to assemble the contributions
-
- if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-
- call MPI_TEST(request_send,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
-
- do idimension = 1,ndim_assemble
-! erase contributions to central cube array
- array_central_cube(:) = 0._CUSTOM_REAL
-
-! use indirect addressing to store contributions only once
-! distinguish between single and double precision for reals
+ ! checks that central cube chunks have received all (requested) messages
+ if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
do imsg = 1,nb_msgs_theor_in_cube-1
- do ipoin = 1,npoin2D_cube_from_slices
- if(CUSTOM_REAL == SIZE_REAL) then
- array_central_cube(ibool_central_cube(imsg,ipoin)) = sngl(buffer_all_cube_from_slices(ipoin,idimension,imsg))
- else
- array_central_cube(ibool_central_cube(imsg,ipoin)) = buffer_all_cube_from_slices(ipoin,idimension,imsg)
- endif
- enddo
+ call MPI_TEST(request_receive_array_cc(imsg),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
enddo
-! add the constribution of AB or AB_ANTIPODE to sum with the external slices on the edges
-! use a mask to avoid taking the same point into account several times.
- mask(:) = .false.
- do ipoin = 1,npoin2D_cube_from_slices
- if (.not. mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin))) then
- if(CUSTOM_REAL == SIZE_REAL) then
- array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
- array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
- sngl(buffer_all_cube_from_slices(ipoin,idimension,nb_msgs_theor_in_cube))
- else
- array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
- array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
- buffer_all_cube_from_slices(ipoin,idimension,nb_msgs_theor_in_cube)
- endif
- mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = .true.
- endif
- enddo
+ endif
-! suppress degrees of freedom already assembled at top of cube on edges
- do ispec = 1,NSPEC_INNER_CORE
- if(idoubling_inner_core(ispec) == IFLAG_TOP_CENTRAL_CUBE) then
- k = NGLLZ
+ ! exchange of their bottom faces between chunks AB and AB_ANTIPODE
+ if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+ ipoin = 0
+ do ispec = NSPEC_INNER_CORE, 1, -1
+ if (idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE) then
+ k = 1
do j = 1,NGLLY
do i = 1,NGLLX
- array_central_cube(ibool_inner_core(i,j,k,ispec)) = 0._CUSTOM_REAL
+ ipoin = ipoin + 1
+ buffer_slices(ipoin,:) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
enddo
enddo
endif
enddo
+ sender = sender_from_slices_to_cube(nb_msgs_theor_in_cube)
+ ! call MPI_SENDRECV(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
+ ! itag,buffer_slices2,ndim_assemble*npoin2D_cube_from_slices,&
+ ! MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-! assemble contributions
- vector_assemble(idimension,:) = vector_assemble(idimension,:) + array_central_cube(:)
+ call MPI_IRECV(buffer_all_cube_from_slices(1,1,nb_msgs_theor_in_cube), &
+ ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,request_receive_cc,ier)
+ !! DK DK this merged with previous statement
+ ! buffer_all_cube_from_slices(:,:,nb_msgs_theor_in_cube) = buffer_slices2(:,:)
-! copy sum back
- do imsg = 1,nb_msgs_theor_in_cube-1
+ call MPI_ISEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
+ itag,MPI_COMM_WORLD,request_send_cc,ier)
+ endif
+
+ iphase_comm_CC = iphase_comm_CC + 1
+ return ! exit because we have started some communications therefore we need some time
+
+ case( 3 )
+
+ !--- now we need to assemble the contributions
+
+ ! central cube chunks AB and AB_ANTIPODE assemble values and send them out to others
+
+ if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+ ! checks that messages between AB and AB_ANTIPODE have been sent and received
+ call MPI_TEST(request_send_cc,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive_cc,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+
+ do idimension = 1,ndim_assemble
+ ! erase contributions to central cube array
+ array_central_cube(:) = 0._CUSTOM_REAL
+
+ ! use indirect addressing to store contributions only once
+ ! distinguish between single and double precision for reals
+ do imsg = 1,nb_msgs_theor_in_cube-1
+ do ipoin = 1,npoin2D_cube_from_slices
+ if(CUSTOM_REAL == SIZE_REAL) then
+ array_central_cube(ibool_central_cube(imsg,ipoin)) = sngl(buffer_all_cube_from_slices(ipoin,idimension,imsg))
+ else
+ array_central_cube(ibool_central_cube(imsg,ipoin)) = buffer_all_cube_from_slices(ipoin,idimension,imsg)
+ endif
+ enddo
+ enddo
+ ! add the constribution of AB or AB_ANTIPODE to sum with the external slices on the edges
+ ! use a mask to avoid taking the same point into account several times.
+ mask(:) = .false.
do ipoin = 1,npoin2D_cube_from_slices
- buffer_all_cube_from_slices(ipoin,idimension,imsg) = vector_assemble(idimension,ibool_central_cube(imsg,ipoin))
+ if (.not. mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin))) then
+ if(CUSTOM_REAL == SIZE_REAL) then
+ array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
+ array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
+ sngl(buffer_all_cube_from_slices(ipoin,idimension,nb_msgs_theor_in_cube))
+ else
+ array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
+ array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
+ buffer_all_cube_from_slices(ipoin,idimension,nb_msgs_theor_in_cube)
+ endif
+ mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = .true.
+ endif
enddo
+
+ ! suppress degrees of freedom already assembled at top of cube on edges
+ do ispec = 1,NSPEC_INNER_CORE
+ if(idoubling_inner_core(ispec) == IFLAG_TOP_CENTRAL_CUBE) then
+ k = NGLLZ
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ array_central_cube(ibool_inner_core(i,j,k,ispec)) = 0._CUSTOM_REAL
+ enddo
+ enddo
+ endif
+ enddo
+
+ ! assemble contributions
+ vector_assemble(idimension,:) = vector_assemble(idimension,:) + array_central_cube(:)
+
+ ! copy sum back
+ do imsg = 1,nb_msgs_theor_in_cube-1
+ do ipoin = 1,npoin2D_cube_from_slices
+ buffer_all_cube_from_slices(ipoin,idimension,imsg) = vector_assemble(idimension,ibool_central_cube(imsg,ipoin))
+ enddo
+ enddo
+
enddo
- enddo
+ endif
- endif
+ !----------
-!----------
+ ! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+ if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+ ! receive buffers from slices
+ sender = receiver_cube_from_slices
+ call MPI_IRECV(buffer_slices, &
+ ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+ itag,MPI_COMM_WORLD,request_receive_cc,ier)
+ ! for bottom elements in contact with central cube from the slices side
+ ! ipoin = 0
+ ! do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+ ! ispec = ibelm_bottom_inner_core(ispec2D)
+ ! only for DOFs exactly on surface of central cube (bottom of these elements)
+ ! k = 1
+ ! do j = 1,NGLLY
+ ! do i = 1,NGLLX
+ ! ipoin = ipoin + 1
+ ! distinguish between single and double precision for reals
+ ! if(CUSTOM_REAL == SIZE_REAL) then
+ ! vector_assemble(:,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,:))
+ ! else
+ ! vector_assemble(:,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,:)
+ ! endif
+ ! enddo
+ ! enddo
+ ! enddo
+ endif ! end receiving info from central cube
-! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
- if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-! receive buffers from slices
- sender = receiver_cube_from_slices
- call MPI_IRECV(buffer_slices, &
- ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
- itag,MPI_COMM_WORLD,request_receive,ier)
-! for bottom elements in contact with central cube from the slices side
-! ipoin = 0
-! do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
-! ispec = ibelm_bottom_inner_core(ispec2D)
-! only for DOFs exactly on surface of central cube (bottom of these elements)
-! k = 1
-! do j = 1,NGLLY
-! do i = 1,NGLLX
-! ipoin = ipoin + 1
-! distinguish between single and double precision for reals
-! if(CUSTOM_REAL == SIZE_REAL) then
-! vector_assemble(:,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,:))
-! else
-! vector_assemble(:,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,:)
-! endif
-! enddo
-! enddo
-! enddo
- endif ! end receiving info from central cube
+ !------- send info back from central cube to slices
-!------- send info back from central cube to slices
+ ! on chunk AB & CHUNK_AB_ANTIPODE, send all the messages to slices
+ if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+ do imsg = 1,nb_msgs_theor_in_cube-1
+ ! send buffers to slices
+ receiver = sender_from_slices_to_cube(imsg)
+ call MPI_ISEND(buffer_all_cube_from_slices(1,1,imsg),ndim_assemble*npoin2D_cube_from_slices, &
+ MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,request_send_array_cc(imsg),ier)
+ enddo
+ endif
-! on chunk AB & CHUNK_AB_ANTIPODE, send all the messages to slices
- if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
- do imsg = 1,nb_msgs_theor_in_cube-1
-! send buffers to slices
- receiver = sender_from_slices_to_cube(imsg)
- call MPI_ISEND(buffer_all_cube_from_slices(1,1,imsg),ndim_assemble*npoin2D_cube_from_slices, &
- MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,request_send_array(imsg),ier)
- enddo
- endif
+ iphase_comm_CC = iphase_comm_CC + 1
+ return ! exit because we have started some communications therefore we need some time
- iphase_CC = iphase_CC + 1
- return ! exit because we have started some communications therefore we need some time
+ case( 4 )
- endif !!!!!!!!! end of iphase_CC 3
+ ! all non-central cube chunks set the values at the common points with central cube
- if(iphase_CC == 4) then
+ ! checks that messages were sent out by central cube chunks AB and AB_ANTIPODE
+ if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+ do imsg = 1,nb_msgs_theor_in_cube-1
+ call MPI_TEST(request_send_array_cc(imsg),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ enddo
+ endif
- if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
- do imsg = 1,nb_msgs_theor_in_cube-1
- call MPI_TEST(request_send_array(imsg),flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- enddo
- endif
+ ! checks that messages have been received
+ if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+ call MPI_TEST(request_receive_cc,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+ endif
- if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
- call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
- endif
-
-! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
- if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-! for bottom elements in contact with central cube from the slices side
- ipoin = 0
- do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
- ispec = ibelm_bottom_inner_core(ispec2D)
-! only for DOFs exactly on surface of central cube (bottom of these elements)
- k = 1
- do j = 1,NGLLY
- do i = 1,NGLLX
- ipoin = ipoin + 1
-! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,:))
- else
- vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,:)
- endif
+ ! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+ if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+ ! for bottom elements in contact with central cube from the slices side
+ ipoin = 0
+ do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+ ispec = ibelm_bottom_inner_core(ispec2D)
+ ! only for DOFs exactly on surface of central cube (bottom of these elements)
+ k = 1
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ ipoin = ipoin + 1
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,:))
+ else
+ vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,:)
+ endif
+ enddo
enddo
enddo
- enddo
- endif ! end receiving info from central cube
+ endif ! end receiving info from central cube
-! this is the exit condition, to go beyond the last phase number
- iphase_CC = iphase_CC + 1
+ ! this is the exit condition, to go beyond the last phase number
+ iphase_comm_CC = iphase_comm_CC + 1
- endif !!!!!!!!! end of iphase_CC 4
+ end select
end subroutine assemble_MPI_central_cube
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -26,9 +26,13 @@
!=====================================================================
subroutine assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
- npoin2D_cube_from_slices, buffer_all_cube_from_slices, buffer_slices, buffer_slices2, ibool_central_cube, &
- receiver_cube_from_slices, ibool_inner_core, idoubling_inner_core, NSPEC_INNER_CORE, &
- ibelm_bottom_inner_core, NSPEC2D_BOTTOM_INNER_CORE,NGLOB_INNER_CORE,vector_assemble,ndim_assemble)
+ npoin2D_cube_from_slices, &
+ buffer_all_cube_from_slices, buffer_slices, buffer_slices2, &
+ ibool_central_cube, &
+ receiver_cube_from_slices, ibool_inner_core, &
+ idoubling_inner_core, NSPEC_INNER_CORE, &
+ ibelm_bottom_inner_core, NSPEC2D_BOTTOM_INNER_CORE,NGLOB_INNER_CORE, &
+ vector_assemble,ndim_assemble)
! this version of the routine is based on blocking MPI calls
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -36,11 +36,15 @@
iboolfaces,iboolcorner, &
iprocfrom_faces,iprocto_faces, &
iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
+ buffer_send_faces_scalar,buffer_received_faces_scalar, &
+ npoin2D_max_all_CM_IC, &
buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+ request_send,request_receive, &
+ request_send_array,request_receive_array, &
NUMMSGS_FACES,NCORNERSCHUNKS, &
NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL, &
- NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,NCHUNKS,iphase)
+ NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,NCHUNKS, &
+ iphase_comm)
implicit none
@@ -50,7 +54,7 @@
include "constants.h"
include "precision.h"
- integer myrank,nglob,NCHUNKS,iphase
+ integer myrank,nglob,NCHUNKS,iphase_comm
! array to assemble
real(kind=CUSTOM_REAL), dimension(nglob), intent(inout) :: array_val
@@ -72,44 +76,49 @@
! indirect addressing for each corner of the chunks
integer, dimension(NGLOB1D_RADIAL,NUMCORNERS_SHARED), intent(in) :: iboolcorner
- integer icount_corners
integer, intent(in) :: npoin2D_max_all_CM_IC
integer, dimension(NGLOB2DMAX_XY,NUMFACES_SHARED), intent(in) :: iboolfaces
- real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC,NUMFACES_SHARED), intent(inout) :: buffer_send_faces_scalar, &
- buffer_received_faces_scalar
+ real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC,NUMFACES_SHARED), intent(inout) :: &
+ buffer_send_faces_scalar, &
+ buffer_received_faces_scalar
-! buffers for send and receive between corners of the chunks
+ ! buffers for send and receive between corners of the chunks
real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL), intent(inout) :: buffer_send_chunkcorn_scalar, &
buffer_recv_chunkcorn_scalar
-! ---- arrays to assemble between chunks
+ ! ---- arrays to assemble between chunks
-! communication pattern for faces between chunks
+ ! communication pattern for faces between chunks
integer, dimension(NUMMSGS_FACES), intent(in) :: iprocfrom_faces,iprocto_faces
-! communication pattern for corners between chunks
+ ! communication pattern for corners between chunks
integer, dimension(NCORNERSCHUNKS), intent(in) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-! MPI status of messages to be received
+ ! stored as global parameters
+ integer, intent(inout) :: request_send,request_receive
+ integer, dimension(NUMFACES_SHARED), intent(inout) :: request_send_array,request_receive_array
+
+! local parameters
+
+ ! MPI status of messages to be received
integer, dimension(MPI_STATUS_SIZE) :: msg_status
integer :: ipoin,ipoin2D,ipoin1D
integer :: sender,receiver
integer :: imsg
integer :: icount_faces,npoin2D_chunks
+ integer :: icount_corners
integer :: ier
-! do not remove the "save" statement because this routine is non blocking
- integer, save :: request_send,request_receive
- integer, dimension(NUMFACES_SHARED), save :: request_send_array,request_receive_array
logical :: flag_result_test
+
! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
! check flag to see if we need to assemble (might be turned off when debugging)
if (.not. ACTUALLY_ASSEMBLE_MPI_SLICES) then
- iphase = 9999 ! this means everything is finished
+ iphase_comm = 9999 ! this means everything is finished
return
endif
@@ -123,452 +132,716 @@
!---- first assemble along xi using the 2-D topology
!----
- if(iphase == 1) then
+ if(iphase_comm == 1) then
-! slices copy the right face into the buffer
- do ipoin=1,npoin2D_xi(2)
- buffer_send_faces_scalar(ipoin,1) = array_val(iboolright_xi(ipoin))
- enddo
+ ! slices copy the right face into the buffer
+ do ipoin=1,npoin2D_xi(2)
+ buffer_send_faces_scalar(ipoin,1) = array_val(iboolright_xi(ipoin))
+ enddo
-! send messages forward along each row
- if(iproc_xi == 0) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
- endif
- if(iproc_xi == NPROC_XI-1) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
- endif
- call MPI_IRECV(buffer_received_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive,ier)
+ ! send messages forward along each row
+ if(iproc_xi == 0) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
+ endif
+ if(iproc_xi == NPROC_XI-1) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
+ endif
+ call MPI_IRECV(buffer_received_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
- call MPI_ISEND(buffer_send_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,receiver, &
- itag2,MPI_COMM_WORLD,request_send,ier)
+ call MPI_ISEND(buffer_send_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
- endif !!!!!!!!! end of iphase 1
+ endif !!!!!!!!! end of iphase_comm 1
- if(iphase == 2) then
+ if(iphase_comm == 2) then
-! call MPI_WAIT(request_send,msg_status,ier)
-! call MPI_WAIT(request_receive,msg_status,ier)
- call MPI_TEST(request_send,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
+ ! call MPI_WAIT(request_send,msg_status,ier)
+ ! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
-! all slices add the buffer received to the contributions on the left face
- if(iproc_xi > 0) then
- do ipoin=1,npoin2D_xi(1)
- array_val(iboolleft_xi(ipoin)) = array_val(iboolleft_xi(ipoin)) + &
- buffer_received_faces_scalar(ipoin,1)
- enddo
- endif
+ ! all slices add the buffer received to the contributions on the left face
+ if(iproc_xi > 0) then
+ do ipoin=1,npoin2D_xi(1)
+ array_val(iboolleft_xi(ipoin)) = array_val(iboolleft_xi(ipoin)) + &
+ buffer_received_faces_scalar(ipoin,1)
+ enddo
+ endif
-! the contributions are correctly assembled on the left side of each slice
-! now we have to send the result back to the sender
-! all slices copy the left face into the buffer
- do ipoin=1,npoin2D_xi(1)
- buffer_send_faces_scalar(ipoin,1) = array_val(iboolleft_xi(ipoin))
- enddo
+ ! the contributions are correctly assembled on the left side of each slice
+ ! now we have to send the result back to the sender
+ ! all slices copy the left face into the buffer
+ do ipoin=1,npoin2D_xi(1)
+ buffer_send_faces_scalar(ipoin,1) = array_val(iboolleft_xi(ipoin))
+ enddo
-! send messages backward along each row
- if(iproc_xi == NPROC_XI-1) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
- endif
- if(iproc_xi == 0) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
- endif
- call MPI_IRECV(buffer_received_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive,ier)
+ ! send messages backward along each row
+ if(iproc_xi == NPROC_XI-1) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
+ endif
+ if(iproc_xi == 0) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
+ endif
+ call MPI_IRECV(buffer_received_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
- call MPI_ISEND(buffer_send_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,receiver, &
- itag2,MPI_COMM_WORLD,request_send,ier)
+ call MPI_ISEND(buffer_send_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
- endif !!!!!!!!! end of iphase 2
+ endif !!!!!!!!! end of iphase_comm 2
- if(iphase == 3) then
+ if(iphase_comm == 3) then
-! call MPI_WAIT(request_send,msg_status,ier)
-! call MPI_WAIT(request_receive,msg_status,ier)
- call MPI_TEST(request_send,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
+ ! call MPI_WAIT(request_send,msg_status,ier)
+ ! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
-! all slices copy the buffer received to the contributions on the right face
- if(iproc_xi < NPROC_XI-1) then
- do ipoin=1,npoin2D_xi(2)
- array_val(iboolright_xi(ipoin)) = buffer_received_faces_scalar(ipoin,1)
- enddo
- endif
+ ! all slices copy the buffer received to the contributions on the right face
+ if(iproc_xi < NPROC_XI-1) then
+ do ipoin=1,npoin2D_xi(2)
+ array_val(iboolright_xi(ipoin)) = buffer_received_faces_scalar(ipoin,1)
+ enddo
+ endif
-!----
-!---- then assemble along eta using the 2-D topology
-!----
+ !----
+ !---- then assemble along eta using the 2-D topology
+ !----
-! slices copy the right face into the buffer
- do ipoin=1,npoin2D_eta(2)
- buffer_send_faces_scalar(ipoin,1) = array_val(iboolright_eta(ipoin))
- enddo
+ ! slices copy the right face into the buffer
+ do ipoin=1,npoin2D_eta(2)
+ buffer_send_faces_scalar(ipoin,1) = array_val(iboolright_eta(ipoin))
+ enddo
-! send messages forward along each row
- if(iproc_eta == 0) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
- endif
- if(iproc_eta == NPROC_ETA-1) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
- endif
- call MPI_IRECV(buffer_received_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive,ier)
+ ! send messages forward along each row
+ if(iproc_eta == 0) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
+ endif
+ if(iproc_eta == NPROC_ETA-1) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
+ endif
+ call MPI_IRECV(buffer_received_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
- call MPI_ISEND(buffer_send_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,receiver, &
- itag2,MPI_COMM_WORLD,request_send,ier)
+ call MPI_ISEND(buffer_send_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
- endif !!!!!!!!! end of iphase 3
+ endif !!!!!!!!! end of iphase_comm 3
- if(iphase == 4) then
+ if(iphase_comm == 4) then
-! call MPI_WAIT(request_send,msg_status,ier)
-! call MPI_WAIT(request_receive,msg_status,ier)
- call MPI_TEST(request_send,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
+ ! call MPI_WAIT(request_send,msg_status,ier)
+ ! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
-! all slices add the buffer received to the contributions on the left face
- if(iproc_eta > 0) then
- do ipoin=1,npoin2D_eta(1)
- array_val(iboolleft_eta(ipoin)) = array_val(iboolleft_eta(ipoin)) + &
- buffer_received_faces_scalar(ipoin,1)
- enddo
- endif
+ ! all slices add the buffer received to the contributions on the left face
+ if(iproc_eta > 0) then
+ do ipoin=1,npoin2D_eta(1)
+ array_val(iboolleft_eta(ipoin)) = array_val(iboolleft_eta(ipoin)) + &
+ buffer_received_faces_scalar(ipoin,1)
+ enddo
+ endif
-! the contributions are correctly assembled on the left side of each slice
-! now we have to send the result back to the sender
-! all slices copy the left face into the buffer
- do ipoin=1,npoin2D_eta(1)
- buffer_send_faces_scalar(ipoin,1) = array_val(iboolleft_eta(ipoin))
- enddo
+ ! the contributions are correctly assembled on the left side of each slice
+ ! now we have to send the result back to the sender
+ ! all slices copy the left face into the buffer
+ do ipoin=1,npoin2D_eta(1)
+ buffer_send_faces_scalar(ipoin,1) = array_val(iboolleft_eta(ipoin))
+ enddo
-! send messages backward along each row
- if(iproc_eta == NPROC_ETA-1) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
- endif
- if(iproc_eta == 0) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
- endif
- call MPI_IRECV(buffer_received_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive,ier)
+ ! send messages backward along each row
+ if(iproc_eta == NPROC_ETA-1) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
+ endif
+ if(iproc_eta == 0) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
+ endif
+ call MPI_IRECV(buffer_received_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
- call MPI_ISEND(buffer_send_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,receiver, &
- itag2,MPI_COMM_WORLD,request_send,ier)
+ call MPI_ISEND(buffer_send_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
- endif !!!!!!!!! end of iphase 4
+ endif !!!!!!!!! end of iphase_comm 4
- if(iphase == 5) then
+ if(iphase_comm == 5) then
-! call MPI_WAIT(request_send,msg_status,ier)
-! call MPI_WAIT(request_receive,msg_status,ier)
- call MPI_TEST(request_send,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
+ ! call MPI_WAIT(request_send,msg_status,ier)
+ ! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
-! all slices copy the buffer received to the contributions on the right face
- if(iproc_eta < NPROC_ETA-1) then
- do ipoin=1,npoin2D_eta(2)
- array_val(iboolright_eta(ipoin)) = buffer_received_faces_scalar(ipoin,1)
- enddo
- endif
+ ! all slices copy the buffer received to the contributions on the right face
+ if(iproc_eta < NPROC_ETA-1) then
+ do ipoin=1,npoin2D_eta(2)
+ array_val(iboolright_eta(ipoin)) = buffer_received_faces_scalar(ipoin,1)
+ enddo
+ endif
-!----
-!---- start MPI assembling phase between chunks
-!----
+ !----
+ !---- start MPI assembling phase between chunks
+ !----
-! check flag to see if we need to assemble (might be turned off when debugging)
-! and do not assemble if only one chunk
- if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) then
- iphase = 9999 ! this means everything is finished
- return
- endif
+ ! check flag to see if we need to assemble (might be turned off when debugging)
+ ! and do not assemble if only one chunk
+ if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) then
+ iphase_comm = 9999 ! this means everything is finished
+ return
+ endif
-! ***************************************************************
-! transmit messages in forward direction (iprocfrom -> iprocto)
-! ***************************************************************
+ ! ***************************************************************
+ ! transmit messages in forward direction (iprocfrom -> iprocto)
+ ! ***************************************************************
-!---- put slices in receive mode
-!---- a given slice can belong to at most two faces
+ !---- put slices in receive mode
+ !---- a given slice can belong to at most two faces
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg)) then
- sender = iprocfrom_faces(imsg)
- npoin2D_chunks = npoin2D_faces(icount_faces)
- call MPI_IRECV(buffer_received_faces_scalar(1,icount_faces), &
- npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
-! do ipoin2D=1,npoin2D_chunks
-! array_val(iboolfaces(ipoin2D,icount_faces)) = &
-! array_val(iboolfaces(ipoin2D,icount_faces)) + buffer_received_faces_scalar(ipoin2D)
-! enddo
- endif
- enddo
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
+ sender = iprocfrom_faces(imsg)
+ npoin2D_chunks = npoin2D_faces(icount_faces)
+ call MPI_IRECV(buffer_received_faces_scalar(1,icount_faces), &
+ npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
+ ! do ipoin2D=1,npoin2D_chunks
+ ! array_val(iboolfaces(ipoin2D,icount_faces)) = &
+ ! array_val(iboolfaces(ipoin2D,icount_faces)) + buffer_received_faces_scalar(ipoin2D)
+ ! enddo
+ endif
+ enddo
-!---- put slices in send mode
-!---- a given slice can belong to at most two faces
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg)) then
- receiver = iprocto_faces(imsg)
- npoin2D_chunks = npoin2D_faces(icount_faces)
- do ipoin2D=1,npoin2D_chunks
- buffer_send_faces_scalar(ipoin2D,icount_faces) = array_val(iboolfaces(ipoin2D,icount_faces))
+ !---- put slices in send mode
+ !---- a given slice can belong to at most two faces
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ receiver = iprocto_faces(imsg)
+ npoin2D_chunks = npoin2D_faces(icount_faces)
+ do ipoin2D=1,npoin2D_chunks
+ buffer_send_faces_scalar(ipoin2D,icount_faces) = array_val(iboolfaces(ipoin2D,icount_faces))
+ enddo
+ call MPI_ISEND(buffer_send_faces_scalar(1,icount_faces),npoin2D_chunks, &
+ CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,request_send_array(icount_faces),ier)
+ endif
enddo
- call MPI_ISEND(buffer_send_faces_scalar(1,icount_faces),npoin2D_chunks, &
- CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,request_send_array(icount_faces),ier)
- endif
- enddo
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
- endif !!!!!!!!! end of iphase 5
+ endif !!!!!!!!! end of iphase_comm 5
- if(iphase == 6) then
+ if(iphase_comm == 6) then
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg)) then
- call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
- endif
- enddo
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
+ call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+ endif
+ enddo
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg)) then
- call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- endif
- enddo
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ endif
+ enddo
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg)) then
- do ipoin2D=1,npoin2D_faces(icount_faces)
- array_val(iboolfaces(ipoin2D,icount_faces)) = &
- array_val(iboolfaces(ipoin2D,icount_faces)) + buffer_received_faces_scalar(ipoin2D,icount_faces)
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
+ do ipoin2D=1,npoin2D_faces(icount_faces)
+ array_val(iboolfaces(ipoin2D,icount_faces)) = &
+ array_val(iboolfaces(ipoin2D,icount_faces)) + buffer_received_faces_scalar(ipoin2D,icount_faces)
+ enddo
+ endif
enddo
- endif
- enddo
-! *********************************************************************
-! transmit messages back in opposite direction (iprocto -> iprocfrom)
-! *********************************************************************
+ ! *********************************************************************
+ ! transmit messages back in opposite direction (iprocto -> iprocfrom)
+ ! *********************************************************************
-!---- put slices in receive mode
-!---- a given slice can belong to at most two faces
+ !---- put slices in receive mode
+ !---- a given slice can belong to at most two faces
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg)) then
- sender = iprocto_faces(imsg)
- npoin2D_chunks = npoin2D_faces(icount_faces)
- call MPI_IRECV(buffer_received_faces_scalar(1,icount_faces), &
- npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
-! do ipoin2D=1,npoin2D_chunks
-! array_val(iboolfaces(ipoin2D,icount_faces)) = buffer_received_faces_scalar(ipoin2D)
-! enddo
- endif
- enddo
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ sender = iprocto_faces(imsg)
+ npoin2D_chunks = npoin2D_faces(icount_faces)
+ call MPI_IRECV(buffer_received_faces_scalar(1,icount_faces), &
+ npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
+ ! do ipoin2D=1,npoin2D_chunks
+ ! array_val(iboolfaces(ipoin2D,icount_faces)) = buffer_received_faces_scalar(ipoin2D)
+ ! enddo
+ endif
+ enddo
-!---- put slices in send mode
-!---- a given slice can belong to at most two faces
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg)) then
- receiver = iprocfrom_faces(imsg)
- npoin2D_chunks = npoin2D_faces(icount_faces)
- do ipoin2D=1,npoin2D_chunks
- buffer_send_faces_scalar(ipoin2D,icount_faces) = array_val(iboolfaces(ipoin2D,icount_faces))
+ !---- put slices in send mode
+ !---- a given slice can belong to at most two faces
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
+ receiver = iprocfrom_faces(imsg)
+ npoin2D_chunks = npoin2D_faces(icount_faces)
+ do ipoin2D=1,npoin2D_chunks
+ buffer_send_faces_scalar(ipoin2D,icount_faces) = array_val(iboolfaces(ipoin2D,icount_faces))
+ enddo
+ call MPI_ISEND(buffer_send_faces_scalar(1,icount_faces),npoin2D_chunks, &
+ CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,request_send_array(icount_faces),ier)
+ endif
enddo
- call MPI_ISEND(buffer_send_faces_scalar(1,icount_faces),npoin2D_chunks, &
- CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,request_send_array(icount_faces),ier)
- endif
- enddo
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
- endif !!!!!!!!! end of iphase 6
+ endif !!!!!!!!! end of iphase_comm 6
- if(iphase == 7) then
+ if(iphase_comm == 7) then
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg)) then
- call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
- endif
- enddo
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
+ call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+ endif
+ enddo
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg)) then
- call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- endif
- enddo
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ endif
+ enddo
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg)) then
- do ipoin2D=1,npoin2D_faces(icount_faces)
- array_val(iboolfaces(ipoin2D,icount_faces)) = buffer_received_faces_scalar(ipoin2D,icount_faces)
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ do ipoin2D=1,npoin2D_faces(icount_faces)
+ array_val(iboolfaces(ipoin2D,icount_faces)) = buffer_received_faces_scalar(ipoin2D,icount_faces)
+ enddo
+ endif
enddo
- endif
- enddo
-! this is the exit condition, to go beyond the last phase number
- iphase = iphase + 1
+ ! this is the exit condition, to go beyond the last phase number
+ iphase_comm = iphase_comm + 1
-!! DK DK do the rest in blocking for now, for simplicity
+ !! DK DK do the rest in blocking for now, for simplicity
-!----
-!---- start MPI assembling corners
-!----
+ !----
+ !---- start MPI assembling corners
+ !----
-! scheme for corners cannot deadlock even if NPROC_XI = NPROC_ETA = 1
+ ! scheme for corners cannot deadlock even if NPROC_XI = NPROC_ETA = 1
-! ***************************************************************
-! transmit messages in forward direction (two workers -> master)
-! ***************************************************************
+ ! ***************************************************************
+ ! transmit messages in forward direction (two workers -> master)
+ ! ***************************************************************
- icount_corners = 0
+ icount_corners = 0
- do imsg = 1,NCORNERSCHUNKS
+ do imsg = 1,NCORNERSCHUNKS
- if(myrank == iproc_master_corners(imsg) .or. &
- myrank == iproc_worker1_corners(imsg) .or. &
- (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
+ if(myrank == iproc_master_corners(imsg) .or. &
+ myrank == iproc_worker1_corners(imsg) .or. &
+ (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
-!---- receive messages from the two workers on the master
- if(myrank==iproc_master_corners(imsg)) then
+ !---- receive messages from the two workers on the master
+ if(myrank==iproc_master_corners(imsg)) then
-! receive from worker #1 and add to local array
- sender = iproc_worker1_corners(imsg)
- call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
- CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- do ipoin1D=1,NGLOB1D_RADIAL
- array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_scalar(ipoin1D)
+ ! receive from worker #1 and add to local array
+ sender = iproc_worker1_corners(imsg)
+ call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+ CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ do ipoin1D=1,NGLOB1D_RADIAL
+ array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
+ buffer_recv_chunkcorn_scalar(ipoin1D)
+ enddo
+
+ ! receive from worker #2 and add to local array
+ if(NCHUNKS /= 2) then
+ sender = iproc_worker2_corners(imsg)
+ call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+ CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ do ipoin1D=1,NGLOB1D_RADIAL
+ array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
+ buffer_recv_chunkcorn_scalar(ipoin1D)
+ enddo
+ endif
+
+ endif
+
+ !---- send messages from the two workers to the master
+ if(myrank==iproc_worker1_corners(imsg) .or. &
+ (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+
+ receiver = iproc_master_corners(imsg)
+ do ipoin1D=1,NGLOB1D_RADIAL
+ buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
+ enddo
+ call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+ receiver,itag,MPI_COMM_WORLD,ier)
+
+ endif
+
+ ! *********************************************************************
+ ! transmit messages back in opposite direction (master -> two workers)
+ ! *********************************************************************
+
+ !---- receive messages from the master on the two workers
+ if(myrank==iproc_worker1_corners(imsg) .or. &
+ (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+
+ ! receive from master and copy to local array
+ sender = iproc_master_corners(imsg)
+ call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+ CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ do ipoin1D=1,NGLOB1D_RADIAL
+ array_val(iboolcorner(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_scalar(ipoin1D)
+ enddo
+
+ endif
+
+ !---- send messages from the master to the two workers
+ if(myrank==iproc_master_corners(imsg)) then
+
+ do ipoin1D=1,NGLOB1D_RADIAL
+ buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
+ enddo
+
+ ! send to worker #1
+ receiver = iproc_worker1_corners(imsg)
+ call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+ receiver,itag,MPI_COMM_WORLD,ier)
+
+ ! send to worker #2
+ if(NCHUNKS /= 2) then
+ receiver = iproc_worker2_corners(imsg)
+ call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+ receiver,itag,MPI_COMM_WORLD,ier)
+ endif
+
+ endif
+
enddo
-! receive from worker #2 and add to local array
- if(NCHUNKS /= 2) then
- sender = iproc_worker2_corners(imsg)
- call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
- CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- do ipoin1D=1,NGLOB1D_RADIAL
- array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_scalar(ipoin1D)
+ endif !!!!!!!!! end of iphase_comm 7
+
+ end subroutine assemble_MPI_scalar
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!-------------------------------------------------------------------------------------------------
+!-------------------------------------------------------------------------------------------------
+!
+
+! daniel: TODO - still local versions
+
+
+ subroutine assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,array_val, &
+ buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh, &
+ request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+
+! non-blocking MPI send
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+! array to send
+ real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: array_val
+
+
+ real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
+
+ integer ipoin,iinterface
+
+! sends only if more than one partition
+ if(NPROC > 1) then
+
+ ! partition border copy into the buffer
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ buffer_send_scalar_ext_mesh(ipoin,iinterface) = &
+ array_val(ibool_interfaces_ext_mesh(ipoin,iinterface))
+ enddo
enddo
- endif
+ ! send messages
+ do iinterface = 1, num_interfaces_ext_mesh
+ ! non-blocking synchronous send request
+ call isend_cr(buffer_send_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_send_scalar_ext_mesh(iinterface) &
+ )
+ ! receive request
+ call irecv_cr(buffer_recv_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_recv_scalar_ext_mesh(iinterface) &
+ )
+
+ enddo
+
endif
-!---- send messages from the two workers to the master
- if(myrank==iproc_worker1_corners(imsg) .or. &
- (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+ end subroutine assemble_MPI_scalar_ext_mesh_s
- receiver = iproc_master_corners(imsg)
- do ipoin1D=1,NGLOB1D_RADIAL
- buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
+!
+!----
+!
+
+ subroutine assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,array_val, &
+ buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+
+! waits for send/receiver to be completed and assembles contributions
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+! array to assemble
+ real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: array_val
+
+
+ real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_recv_scalar_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
+
+ integer ipoin,iinterface
+
+! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ ! wait for communications completion (recv)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_recv_scalar_ext_mesh(iinterface))
enddo
- call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
- receiver,itag,MPI_COMM_WORLD,ier)
+ ! adding contributions of neighbours
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) &
+ + buffer_recv_scalar_ext_mesh(ipoin,iinterface)
+ enddo
+ enddo
+
+ ! wait for communications completion (send)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_send_scalar_ext_mesh(iinterface))
+ enddo
+
endif
-! *********************************************************************
-! transmit messages back in opposite direction (master -> two workers)
-! *********************************************************************
+ end subroutine assemble_MPI_scalar_ext_mesh_w
-!---- receive messages from the master on the two workers
- if(myrank==iproc_worker1_corners(imsg) .or. &
- (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
-! receive from master and copy to local array
- sender = iproc_master_corners(imsg)
- call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
- CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- do ipoin1D=1,NGLOB1D_RADIAL
- array_val(iboolcorner(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_scalar(ipoin1D)
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+!daniel: TODO - cuda scalar assembly...
+
+ subroutine assemble_MPI_scalar_send_cuda(NPROC, &
+ buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh, &
+ request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
+ FORWARD_OR_ADJOINT)
+
+! non-blocking MPI send
+
+ ! sends data
+ ! note: assembling data already filled into buffer_send_scalar_ext_mesh array
+
+ use constants
+ use specfem_par,only: Mesh_pointer
+
+ implicit none
+
+ integer :: NPROC
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+ real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
+
+ integer :: FORWARD_OR_ADJOINT
+
+ ! local parameters
+ integer iinterface
+
+! sends only if more than one partition
+ if(NPROC > 1) then
+
+ ! preparation of the contribution between partitions using MPI
+ ! transfers mpi buffers to CPU
+ call transfer_boun_pot_from_device(Mesh_pointer, &
+ buffer_send_scalar_ext_mesh, &
+ FORWARD_OR_ADJOINT)
+
+ ! send messages
+ do iinterface = 1, num_interfaces_ext_mesh
+ ! non-blocking synchronous send request
+ call isend_cr(buffer_send_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_send_scalar_ext_mesh(iinterface) &
+ )
+ ! receive request
+ call irecv_cr(buffer_recv_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_recv_scalar_ext_mesh(iinterface) &
+ )
+
enddo
endif
-!---- send messages from the master to the two workers
- if(myrank==iproc_master_corners(imsg)) then
+ end subroutine assemble_MPI_scalar_send_cuda
- do ipoin1D=1,NGLOB1D_RADIAL
- buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
- enddo
+!
+!-------------------------------------------------------------------------------------------------
+!
-! send to worker #1
- receiver = iproc_worker1_corners(imsg)
- call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
- receiver,itag,MPI_COMM_WORLD,ier)
+ subroutine assemble_MPI_scalar_write_cuda(NPROC, &
+ buffer_recv_scalar_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
+ FORWARD_OR_ADJOINT)
-! send to worker #2
- if(NCHUNKS /= 2) then
- receiver = iproc_worker2_corners(imsg)
- call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
- receiver,itag,MPI_COMM_WORLD,ier)
- endif
+! waits for send/receiver to be completed and assembles contributions
- endif
+ use constants
+ use specfem_par,only: Mesh_pointer
+
+ implicit none
- enddo
+ integer :: NPROC
- endif !!!!!!!!! end of iphase 7
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
- end subroutine assemble_MPI_scalar
+ real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_recv_scalar_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
+ integer :: FORWARD_OR_ADJOINT
+
+ ! local parameters
+ integer :: iinterface
+
+! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ ! wait for communications completion (recv)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_recv_scalar_ext_mesh(iinterface))
+ enddo
+
+ ! adding contributions of neighbours
+ call transfer_asmbl_pot_to_device(Mesh_pointer, &
+ buffer_recv_scalar_ext_mesh, &
+ FORWARD_OR_ADJOINT)
+
+ ! note: adding contributions of neighbours has been done just above for cuda
+ !do iinterface = 1, num_interfaces_ext_mesh
+ ! do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ ! array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ ! array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) &
+ ! + buffer_recv_scalar_ext_mesh(ipoin,iinterface)
+ ! enddo
+ !enddo
+
+ ! wait for communications completion (send)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_send_scalar_ext_mesh(iinterface))
+ enddo
+
+ endif
+
+ end subroutine assemble_MPI_scalar_write_cuda
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -117,62 +117,62 @@
! assemble along xi only if more than one slice
if(NPROC_XI > 1) then
-! slices copy the right face into the buffer
- do ipoin=1,npoin2D_xi(2)
- buffer_send_faces_scalar(ipoin) = array_val(iboolright_xi(ipoin))
- enddo
+ ! slices copy the right face into the buffer
+ do ipoin=1,npoin2D_xi(2)
+ buffer_send_faces_scalar(ipoin) = array_val(iboolright_xi(ipoin))
+ enddo
-! send messages forward along each row
- if(iproc_xi == 0) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
- endif
- if(iproc_xi == NPROC_XI-1) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
- endif
- call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
+ ! send messages forward along each row
+ if(iproc_xi == 0) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
+ endif
+ if(iproc_xi == NPROC_XI-1) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
+ endif
+ call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,receiver, &
+ itag2,buffer_received_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,msg_status,ier)
-! all slices add the buffer received to the contributions on the left face
- if(iproc_xi > 0) then
- do ipoin=1,npoin2D_xi(1)
- array_val(iboolleft_xi(ipoin)) = array_val(iboolleft_xi(ipoin)) + &
- buffer_received_faces_scalar(ipoin)
- enddo
- endif
+ ! all slices add the buffer received to the contributions on the left face
+ if(iproc_xi > 0) then
+ do ipoin=1,npoin2D_xi(1)
+ array_val(iboolleft_xi(ipoin)) = array_val(iboolleft_xi(ipoin)) + &
+ buffer_received_faces_scalar(ipoin)
+ enddo
+ endif
-! the contributions are correctly assembled on the left side of each slice
-! now we have to send the result back to the sender
-! all slices copy the left face into the buffer
- do ipoin=1,npoin2D_xi(1)
- buffer_send_faces_scalar(ipoin) = array_val(iboolleft_xi(ipoin))
- enddo
+ ! the contributions are correctly assembled on the left side of each slice
+ ! now we have to send the result back to the sender
+ ! all slices copy the left face into the buffer
+ do ipoin=1,npoin2D_xi(1)
+ buffer_send_faces_scalar(ipoin) = array_val(iboolleft_xi(ipoin))
+ enddo
-! send messages backward along each row
- if(iproc_xi == NPROC_XI-1) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
- endif
- if(iproc_xi == 0) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
- endif
- call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
+ ! send messages backward along each row
+ if(iproc_xi == NPROC_XI-1) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
+ endif
+ if(iproc_xi == 0) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
+ endif
+ call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,receiver, &
+ itag2,buffer_received_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,msg_status,ier)
-! all slices copy the buffer received to the contributions on the right face
- if(iproc_xi < NPROC_XI-1) then
- do ipoin=1,npoin2D_xi(2)
- array_val(iboolright_xi(ipoin)) = buffer_received_faces_scalar(ipoin)
- enddo
- endif
+ ! all slices copy the buffer received to the contributions on the right face
+ if(iproc_xi < NPROC_XI-1) then
+ do ipoin=1,npoin2D_xi(2)
+ array_val(iboolright_xi(ipoin)) = buffer_received_faces_scalar(ipoin)
+ enddo
+ endif
endif
@@ -183,62 +183,62 @@
! assemble along eta only if more than one slice
if(NPROC_ETA > 1) then
-! slices copy the right face into the buffer
- do ipoin=1,npoin2D_eta(2)
- buffer_send_faces_scalar(ipoin) = array_val(iboolright_eta(ipoin))
- enddo
+ ! slices copy the right face into the buffer
+ do ipoin=1,npoin2D_eta(2)
+ buffer_send_faces_scalar(ipoin) = array_val(iboolright_eta(ipoin))
+ enddo
-! send messages forward along each row
- if(iproc_eta == 0) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
- endif
- if(iproc_eta == NPROC_ETA-1) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
- endif
- call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
+ ! send messages forward along each row
+ if(iproc_eta == 0) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
+ endif
+ if(iproc_eta == NPROC_ETA-1) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
+ endif
+ call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,receiver, &
+ itag2,buffer_received_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,msg_status,ier)
-! all slices add the buffer received to the contributions on the left face
- if(iproc_eta > 0) then
- do ipoin=1,npoin2D_eta(1)
- array_val(iboolleft_eta(ipoin)) = array_val(iboolleft_eta(ipoin)) + &
- buffer_received_faces_scalar(ipoin)
- enddo
- endif
+ ! all slices add the buffer received to the contributions on the left face
+ if(iproc_eta > 0) then
+ do ipoin=1,npoin2D_eta(1)
+ array_val(iboolleft_eta(ipoin)) = array_val(iboolleft_eta(ipoin)) + &
+ buffer_received_faces_scalar(ipoin)
+ enddo
+ endif
-! the contributions are correctly assembled on the left side of each slice
-! now we have to send the result back to the sender
-! all slices copy the left face into the buffer
- do ipoin=1,npoin2D_eta(1)
- buffer_send_faces_scalar(ipoin) = array_val(iboolleft_eta(ipoin))
- enddo
+ ! the contributions are correctly assembled on the left side of each slice
+ ! now we have to send the result back to the sender
+ ! all slices copy the left face into the buffer
+ do ipoin=1,npoin2D_eta(1)
+ buffer_send_faces_scalar(ipoin) = array_val(iboolleft_eta(ipoin))
+ enddo
-! send messages backward along each row
- if(iproc_eta == NPROC_ETA-1) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
- endif
- if(iproc_eta == 0) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
- endif
- call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
+ ! send messages backward along each row
+ if(iproc_eta == NPROC_ETA-1) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
+ endif
+ if(iproc_eta == 0) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
+ endif
+ call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,receiver, &
+ itag2,buffer_received_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,msg_status,ier)
-! all slices copy the buffer received to the contributions on the right face
- if(iproc_eta < NPROC_ETA-1) then
- do ipoin=1,npoin2D_eta(2)
- array_val(iboolright_eta(ipoin)) = buffer_received_faces_scalar(ipoin)
- enddo
- endif
+ ! all slices copy the buffer received to the contributions on the right face
+ if(iproc_eta < NPROC_ETA-1) then
+ do ipoin=1,npoin2D_eta(2)
+ array_val(iboolright_eta(ipoin)) = buffer_received_faces_scalar(ipoin)
+ enddo
+ endif
endif
@@ -261,79 +261,79 @@
! scheme for faces cannot deadlock even if NPROC_XI = NPROC_ETA = 1
do imsg_loop = 1,NUM_MSG_TYPES
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. &
- myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
- sender = iprocfrom_faces(imsg)
- npoin2D_chunks = npoin2D_faces(icount_faces)
- call MPI_RECV(buffer_received_faces_scalar, &
- npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
- do ipoin2D=1,npoin2D_chunks
- array_val(iboolfaces(ipoin2D,icount_faces)) = &
- array_val(iboolfaces(ipoin2D,icount_faces)) + buffer_received_faces_scalar(ipoin2D)
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. &
+ myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+ sender = iprocfrom_faces(imsg)
+ npoin2D_chunks = npoin2D_faces(icount_faces)
+ call MPI_RECV(buffer_received_faces_scalar, &
+ npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,msg_status,ier)
+ do ipoin2D=1,npoin2D_chunks
+ array_val(iboolfaces(ipoin2D,icount_faces)) = &
+ array_val(iboolfaces(ipoin2D,icount_faces)) + buffer_received_faces_scalar(ipoin2D)
+ enddo
+ endif
enddo
- endif
- enddo
-!---- put slices in send mode
-!---- a given slice can belong to at most two faces
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. &
- myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
- receiver = iprocto_faces(imsg)
- npoin2D_chunks = npoin2D_faces(icount_faces)
- do ipoin2D=1,npoin2D_chunks
- buffer_send_faces_scalar(ipoin2D) = array_val(iboolfaces(ipoin2D,icount_faces))
+ !---- put slices in send mode
+ !---- a given slice can belong to at most two faces
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. &
+ myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+ receiver = iprocto_faces(imsg)
+ npoin2D_chunks = npoin2D_faces(icount_faces)
+ do ipoin2D=1,npoin2D_chunks
+ buffer_send_faces_scalar(ipoin2D) = array_val(iboolfaces(ipoin2D,icount_faces))
+ enddo
+ call MPI_SEND(buffer_send_faces_scalar,npoin2D_chunks, &
+ CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+ endif
enddo
- call MPI_SEND(buffer_send_faces_scalar,npoin2D_chunks, &
- CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
- endif
- enddo
-! *********************************************************************
-! transmit messages back in opposite direction (iprocto -> iprocfrom)
-! *********************************************************************
+ ! *********************************************************************
+ ! transmit messages back in opposite direction (iprocto -> iprocfrom)
+ ! *********************************************************************
-!---- put slices in receive mode
-!---- a given slice can belong to at most two faces
+ !---- put slices in receive mode
+ !---- a given slice can belong to at most two faces
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. &
- myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
- sender = iprocto_faces(imsg)
- npoin2D_chunks = npoin2D_faces(icount_faces)
- call MPI_RECV(buffer_received_faces_scalar, &
- npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
- do ipoin2D=1,npoin2D_chunks
- array_val(iboolfaces(ipoin2D,icount_faces)) = buffer_received_faces_scalar(ipoin2D)
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. &
+ myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+ sender = iprocto_faces(imsg)
+ npoin2D_chunks = npoin2D_faces(icount_faces)
+ call MPI_RECV(buffer_received_faces_scalar, &
+ npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,msg_status,ier)
+ do ipoin2D=1,npoin2D_chunks
+ array_val(iboolfaces(ipoin2D,icount_faces)) = buffer_received_faces_scalar(ipoin2D)
+ enddo
+ endif
enddo
- endif
- enddo
-!---- put slices in send mode
-!---- a given slice can belong to at most two faces
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. &
- myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
- receiver = iprocfrom_faces(imsg)
- npoin2D_chunks = npoin2D_faces(icount_faces)
- do ipoin2D=1,npoin2D_chunks
- buffer_send_faces_scalar(ipoin2D) = array_val(iboolfaces(ipoin2D,icount_faces))
+ !---- put slices in send mode
+ !---- a given slice can belong to at most two faces
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. &
+ myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+ receiver = iprocfrom_faces(imsg)
+ npoin2D_chunks = npoin2D_faces(icount_faces)
+ do ipoin2D=1,npoin2D_chunks
+ buffer_send_faces_scalar(ipoin2D) = array_val(iboolfaces(ipoin2D,icount_faces))
+ enddo
+ call MPI_SEND(buffer_send_faces_scalar,npoin2D_chunks, &
+ CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+ endif
enddo
- call MPI_SEND(buffer_send_faces_scalar,npoin2D_chunks, &
- CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
- endif
- enddo
! end of anti-deadlocking loop
enddo
@@ -352,86 +352,86 @@
do imsg = 1,NCORNERSCHUNKS
- if(myrank == iproc_master_corners(imsg) .or. &
- myrank == iproc_worker1_corners(imsg) .or. &
- (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
+ if(myrank == iproc_master_corners(imsg) .or. &
+ myrank == iproc_worker1_corners(imsg) .or. &
+ (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
-!---- receive messages from the two workers on the master
- if(myrank==iproc_master_corners(imsg)) then
+ !---- receive messages from the two workers on the master
+ if(myrank==iproc_master_corners(imsg)) then
-! receive from worker #1 and add to local array
- sender = iproc_worker1_corners(imsg)
- call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
- CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- do ipoin1D=1,NGLOB1D_RADIAL
- array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_scalar(ipoin1D)
- enddo
+ ! receive from worker #1 and add to local array
+ sender = iproc_worker1_corners(imsg)
+ call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+ CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ do ipoin1D=1,NGLOB1D_RADIAL
+ array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
+ buffer_recv_chunkcorn_scalar(ipoin1D)
+ enddo
-! receive from worker #2 and add to local array
- if(NCHUNKS /= 2) then
- sender = iproc_worker2_corners(imsg)
- call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
- CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- do ipoin1D=1,NGLOB1D_RADIAL
- array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_scalar(ipoin1D)
- enddo
- endif
+ ! receive from worker #2 and add to local array
+ if(NCHUNKS /= 2) then
+ sender = iproc_worker2_corners(imsg)
+ call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+ CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ do ipoin1D=1,NGLOB1D_RADIAL
+ array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
+ buffer_recv_chunkcorn_scalar(ipoin1D)
+ enddo
+ endif
- endif
+ endif
-!---- send messages from the two workers to the master
- if(myrank==iproc_worker1_corners(imsg) .or. &
- (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+ !---- send messages from the two workers to the master
+ if(myrank==iproc_worker1_corners(imsg) .or. &
+ (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
- receiver = iproc_master_corners(imsg)
- do ipoin1D=1,NGLOB1D_RADIAL
- buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
- enddo
- call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
- receiver,itag,MPI_COMM_WORLD,ier)
+ receiver = iproc_master_corners(imsg)
+ do ipoin1D=1,NGLOB1D_RADIAL
+ buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
+ enddo
+ call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+ receiver,itag,MPI_COMM_WORLD,ier)
- endif
+ endif
-! *********************************************************************
-! transmit messages back in opposite direction (master -> two workers)
-! *********************************************************************
+ ! *********************************************************************
+ ! transmit messages back in opposite direction (master -> two workers)
+ ! *********************************************************************
-!---- receive messages from the master on the two workers
- if(myrank==iproc_worker1_corners(imsg) .or. &
- (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+ !---- receive messages from the master on the two workers
+ if(myrank==iproc_worker1_corners(imsg) .or. &
+ (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
-! receive from master and copy to local array
- sender = iproc_master_corners(imsg)
- call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
- CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- do ipoin1D=1,NGLOB1D_RADIAL
- array_val(iboolcorner(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_scalar(ipoin1D)
- enddo
+ ! receive from master and copy to local array
+ sender = iproc_master_corners(imsg)
+ call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+ CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ do ipoin1D=1,NGLOB1D_RADIAL
+ array_val(iboolcorner(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_scalar(ipoin1D)
+ enddo
- endif
+ endif
-!---- send messages from the master to the two workers
- if(myrank==iproc_master_corners(imsg)) then
+ !---- send messages from the master to the two workers
+ if(myrank==iproc_master_corners(imsg)) then
- do ipoin1D=1,NGLOB1D_RADIAL
- buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
- enddo
+ do ipoin1D=1,NGLOB1D_RADIAL
+ buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
+ enddo
-! send to worker #1
- receiver = iproc_worker1_corners(imsg)
- call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
- receiver,itag,MPI_COMM_WORLD,ier)
+ ! send to worker #1
+ receiver = iproc_worker1_corners(imsg)
+ call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+ receiver,itag,MPI_COMM_WORLD,ier)
-! send to worker #2
- if(NCHUNKS /= 2) then
- receiver = iproc_worker2_corners(imsg)
- call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
- receiver,itag,MPI_COMM_WORLD,ier)
- endif
+ ! send to worker #2
+ if(NCHUNKS /= 2) then
+ receiver = iproc_worker2_corners(imsg)
+ call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+ receiver,itag,MPI_COMM_WORLD,ier)
+ endif
- endif
+ endif
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -32,35 +32,40 @@
!----
subroutine assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces_vector,buffer_received_faces_vector,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
- NUMMSGS_FACES,NCORNERSCHUNKS, &
- NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL_crust_mantle, &
- NGLOB1D_RADIAL_inner_core,NCHUNKS,iphase)
+ iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,&
+ iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,&
+ iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces_vector,buffer_received_faces_vector, &
+ npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
+ request_send,request_receive, &
+ request_send_array,request_receive_array, &
+ NUMMSGS_FACES,NCORNERSCHUNKS, &
+ NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL_crust_mantle, &
+ NGLOB1D_RADIAL_inner_core,NCHUNKS,iphase_comm)
implicit none
-! standard include of the MPI library
+ ! standard include of the MPI library
include 'mpif.h'
include "constants.h"
include "precision.h"
-! include values created by the mesher
+ ! include values created by the mesher
include "OUTPUT_FILES/values_from_mesher.h"
- integer myrank,NCHUNKS,iphase
+ integer myrank,NCHUNKS,iphase_comm
-! the two arrays to assemble
+ ! the two arrays to assemble
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE), intent(inout) :: accel_crust_mantle
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE), intent(inout) :: accel_inner_core
@@ -74,19 +79,18 @@
integer, intent(in) :: NGLOB1D_RADIAL_crust_mantle,NGLOB1D_RADIAL_inner_core,NPROC_XI,NPROC_ETA
integer, intent(in) :: NUMMSGS_FACES,NCORNERSCHUNKS
-! for addressing of the slices
+ ! for addressing of the slices
integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1), intent(in) :: addressing
-! 2-D addressing and buffers for summation between slices
+ ! 2-D addressing and buffers for summation between slices
integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM), intent(in) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM), intent(in) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC), intent(in) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC), intent(in) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
-! indirect addressing for each corner of the chunks
+ ! indirect addressing for each corner of the chunks
integer, dimension(NGLOB1D_RADIAL_crust_mantle,NUMCORNERS_SHARED), intent(in) :: iboolcorner_crust_mantle
integer, dimension(NGLOB1D_RADIAL_inner_core,NUMCORNERS_SHARED), intent(in) :: iboolcorner_inner_core
- integer icount_corners
integer, intent(in) :: npoin2D_max_all_CM_IC
integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED), intent(in) :: iboolfaces_crust_mantle
@@ -94,20 +98,27 @@
real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED), intent(inout) :: &
buffer_send_faces_vector,buffer_received_faces_vector
-! buffers for send and receive between corners of the chunks
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ ! buffers for send and receive between corners of the chunks
+ ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_crust_mantle + NGLOB1D_RADIAL_inner_core), intent(inout) :: &
buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector
-! ---- arrays to assemble between chunks
+ ! stored as global parameters
+ integer, intent(inout) :: request_send,request_receive
+ integer, dimension(NUMFACES_SHARED), intent(inout) :: request_send_array,request_receive_array
-! communication pattern for faces between chunks
+
+ ! ---- arrays to assemble between chunks
+ ! communication pattern for faces between chunks
integer, dimension(NUMMSGS_FACES), intent(in) :: iprocfrom_faces,iprocto_faces
-
-! communication pattern for corners between chunks
+ ! communication pattern for corners between chunks
integer, dimension(NCORNERSCHUNKS), intent(in) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-! MPI status of messages to be received
+! local parameters
+
+ integer :: icount_corners
+
+ ! MPI status of messages to be received
integer, dimension(MPI_STATUS_SIZE) :: msg_status
integer :: ipoin,ipoin2D,ipoin1D
@@ -118,811 +129,1061 @@
integer :: NGLOB1D_RADIAL_all
integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_all,npoin2D_eta_all
-! do not remove the "save" statement because this routine is non blocking
-! therefore it needs to find the right value of ioffset when it re-enters
-! the routine later to perform the next communication step
+ logical :: flag_result_test
+ integer :: ier
+
+ ! daniel: TODO - comment below might be obsolete..
+ ! do not remove the "save" statement because this routine is non blocking
+ ! therefore it needs to find the right value of ioffset when it re-enters
+ ! the routine later to perform the next communication step
integer, save :: ioffset
- integer :: ier
-! do not remove the "save" statement because this routine is non blocking
- integer, save :: request_send,request_receive
- integer, dimension(NUMFACES_SHARED), save :: request_send_array,request_receive_array
- logical :: flag_result_test
-! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
-
-! check flag to see if we need to assemble (might be turned off when debugging)
+ ! check flag to see if we need to assemble (might be turned off when debugging)
if (.not. ACTUALLY_ASSEMBLE_MPI_SLICES) then
- iphase = 9999 ! this means everything is finished
+ iphase_comm = 9999 ! this means everything is finished
return
endif
-! here we have to assemble all the contributions between slices using MPI
+ ! here we have to assemble all the contributions between slices using MPI
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
npoin2D_xi_all(:) = npoin2D_xi_crust_mantle(:) + npoin2D_xi_inner_core(:)
npoin2D_eta_all(:) = npoin2D_eta_crust_mantle(:) + npoin2D_eta_inner_core(:)
-!----
-!---- assemble the contributions between slices using MPI
-!----
+ !----
+ !---- assemble the contributions between slices using MPI
+ !----
-!----
-!---- first assemble along xi using the 2-D topology
-!----
+ !----
+ !---- first assemble along xi using the 2-D topology
+ !----
+ select case( iphase_comm )
- if(iphase == 1) then
+ case( 1 )
-! slices copy the right face into the buffer
- do ipoin = 1,npoin2D_xi_crust_mantle(2)
- buffer_send_faces_vector(1,ipoin,1) = accel_crust_mantle(1,iboolright_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin,1) = accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin,1) = accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin))
- enddo
+ ! slices send out values along xi sides (right face) forward along each row
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_xi_crust_mantle(2)
+ ! slices copy the right face into the buffer
+ do ipoin = 1,npoin2D_xi_crust_mantle(2)
+ buffer_send_faces_vector(:,ipoin,1) = accel_crust_mantle(:,iboolright_xi_crust_mantle(ipoin))
+ enddo
- do ipoin = 1,npoin2D_xi_inner_core(2)
- buffer_send_faces_vector(1,ioffset + ipoin,1) = accel_inner_core(1,iboolright_xi_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin,1) = accel_inner_core(2,iboolright_xi_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin,1) = accel_inner_core(3,iboolright_xi_inner_core(ipoin))
- enddo
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_xi_crust_mantle(2)
-! send messages forward along each row
- if(iproc_xi == 0) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
- endif
- if(iproc_xi == NPROC_XI-1) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
- endif
- call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive,ier)
+ do ipoin = 1,npoin2D_xi_inner_core(2)
+ buffer_send_faces_vector(:,ioffset + ipoin,1) = accel_inner_core(:,iboolright_xi_inner_core(ipoin))
+ enddo
- call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,receiver, &
- itag2,MPI_COMM_WORLD,request_send,ier)
+ ! send messages forward along each row
+ if(iproc_xi == 0) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
+ endif
+ if(iproc_xi == NPROC_XI-1) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
+ endif
+ ! requests to receive message
+ call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ ! sends out buffer
+ call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
- endif !!!!!!!!! end of iphase 1
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
- if(iphase == 2) then
-! call MPI_WAIT(request_send,msg_status,ier)
-! call MPI_WAIT(request_receive,msg_status,ier)
- call MPI_TEST(request_send,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
+ case( 2 )
-! all slices add the buffer received to the contributions on the left face
- if(iproc_xi > 0) then
+ ! slices assemble along (left face) xi sides
+ ! and send out values along xi sides (left face) backward along each row
- do ipoin = 1,npoin2D_xi_crust_mantle(1)
- accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(1,ipoin,1)
- accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(2,ipoin,1)
- accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(3,ipoin,1)
- enddo
+ ! checks if messages have been sent out and requested ones received
+ ! call MPI_WAIT(request_send,msg_status,ier)
+ ! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_xi_crust_mantle(1)
+ ! all slices add the buffer received to the contributions on the left face
+ if(iproc_xi > 0) then
- do ipoin = 1,npoin2D_xi_inner_core(1)
- accel_inner_core(1,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(1,iboolleft_xi_inner_core(ipoin)) + &
- buffer_received_faces_vector(1,ioffset + ipoin,1)
- accel_inner_core(2,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(2,iboolleft_xi_inner_core(ipoin)) + &
- buffer_received_faces_vector(2,ioffset + ipoin,1)
- accel_inner_core(3,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(3,iboolleft_xi_inner_core(ipoin)) + &
- buffer_received_faces_vector(3,ioffset + ipoin,1)
- enddo
+ do ipoin = 1,npoin2D_xi_crust_mantle(1)
+ accel_crust_mantle(:,iboolleft_xi_crust_mantle(ipoin)) = &
+ accel_crust_mantle(:,iboolleft_xi_crust_mantle(ipoin)) + &
+ buffer_received_faces_vector(:,ipoin,1)
+ enddo
- endif
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_xi_crust_mantle(1)
-! the contributions are correctly assembled on the left side of each slice
-! now we have to send the result back to the sender
-! all slices copy the left face into the buffer
- do ipoin = 1,npoin2D_xi_crust_mantle(1)
- buffer_send_faces_vector(1,ipoin,1) = accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin,1) = accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin,1) = accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin))
- enddo
+ do ipoin = 1,npoin2D_xi_inner_core(1)
+ accel_inner_core(:,iboolleft_xi_inner_core(ipoin)) = &
+ accel_inner_core(:,iboolleft_xi_inner_core(ipoin)) + &
+ buffer_received_faces_vector(:,ioffset + ipoin,1)
+ enddo
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_xi_crust_mantle(1)
+ endif
- do ipoin = 1,npoin2D_xi_inner_core(1)
- buffer_send_faces_vector(1,ioffset + ipoin,1) = accel_inner_core(1,iboolleft_xi_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin,1) = accel_inner_core(2,iboolleft_xi_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin,1) = accel_inner_core(3,iboolleft_xi_inner_core(ipoin))
- enddo
+ ! the contributions are correctly assembled on the left side of each slice
+ ! now we have to send the result back to the sender
+ ! all slices copy the left face into the buffer
+ do ipoin = 1,npoin2D_xi_crust_mantle(1)
+ buffer_send_faces_vector(:,ipoin,1) = accel_crust_mantle(:,iboolleft_xi_crust_mantle(ipoin))
+ enddo
-! send messages backward along each row
- if(iproc_xi == NPROC_XI-1) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
- endif
- if(iproc_xi == 0) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
- endif
- call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive,ier)
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_xi_crust_mantle(1)
- call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,receiver, &
- itag2,MPI_COMM_WORLD,request_send,ier)
+ do ipoin = 1,npoin2D_xi_inner_core(1)
+ buffer_send_faces_vector(:,ioffset + ipoin,1) = accel_inner_core(:,iboolleft_xi_inner_core(ipoin))
+ enddo
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ ! send messages backward along each row
+ if(iproc_xi == NPROC_XI-1) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
+ endif
+ if(iproc_xi == 0) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
+ endif
+ call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
- endif !!!!!!!!! end of iphase 2
+ call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
- if(iphase == 3) then
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
-! call MPI_WAIT(request_send,msg_status,ier)
-! call MPI_WAIT(request_receive,msg_status,ier)
- call MPI_TEST(request_send,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
+ case( 3 )
-! all slices copy the buffer received to the contributions on the right face
- if(iproc_xi < NPROC_XI-1) then
+ ! slices set (right face) xi sides
+ ! and sent out values along eta sides (right face) forward along each column
- do ipoin = 1,npoin2D_xi_crust_mantle(2)
- accel_crust_mantle(1,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin,1)
- accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin,1)
- accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin,1)
- enddo
+ ! checks if messages have been sent out and received
+ ! call MPI_WAIT(request_send,msg_status,ier)
+ ! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_xi_crust_mantle(2)
+ ! all slices copy the buffer received to the contributions on the right face
+ if(iproc_xi < NPROC_XI-1) then
- do ipoin = 1,npoin2D_xi_inner_core(2)
- accel_inner_core(1,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin,1)
- accel_inner_core(2,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin,1)
- accel_inner_core(3,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin,1)
- enddo
+ do ipoin = 1,npoin2D_xi_crust_mantle(2)
+ accel_crust_mantle(:,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(:,ipoin,1)
+ enddo
- endif
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_xi_crust_mantle(2)
-!----
-!---- then assemble along eta using the 2-D topology
-!----
+ do ipoin = 1,npoin2D_xi_inner_core(2)
+ accel_inner_core(:,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(:,ioffset + ipoin,1)
+ enddo
-! slices copy the right face into the buffer
- do ipoin = 1,npoin2D_eta_crust_mantle(2)
- buffer_send_faces_vector(1,ipoin,1) = accel_crust_mantle(1,iboolright_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin,1) = accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin,1) = accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin))
- enddo
+ endif
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_eta_crust_mantle(2)
+ !----
+ !---- then assemble along eta using the 2-D topology
+ !----
- do ipoin = 1,npoin2D_eta_inner_core(2)
- buffer_send_faces_vector(1,ioffset + ipoin,1) = accel_inner_core(1,iboolright_eta_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin,1) = accel_inner_core(2,iboolright_eta_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin,1) = accel_inner_core(3,iboolright_eta_inner_core(ipoin))
- enddo
+ ! slices copy the right face into the buffer
+ do ipoin = 1,npoin2D_eta_crust_mantle(2)
+ buffer_send_faces_vector(:,ipoin,1) = accel_crust_mantle(:,iboolright_eta_crust_mantle(ipoin))
+ enddo
-! send messages forward along each row
- if(iproc_eta == 0) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
- endif
- if(iproc_eta == NPROC_ETA-1) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
- endif
- call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive,ier)
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_eta_crust_mantle(2)
- call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,receiver, &
- itag2,MPI_COMM_WORLD,request_send,ier)
+ do ipoin = 1,npoin2D_eta_inner_core(2)
+ buffer_send_faces_vector(:,ioffset + ipoin,1) = accel_inner_core(:,iboolright_eta_inner_core(ipoin))
+ enddo
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ ! send messages forward along each row
+ if(iproc_eta == 0) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
+ endif
+ if(iproc_eta == NPROC_ETA-1) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
+ endif
+ call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
- endif !!!!!!!!! end of iphase 3
+ call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
- if(iphase == 4) then
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
-! call MPI_WAIT(request_send,msg_status,ier)
-! call MPI_WAIT(request_receive,msg_status,ier)
- call MPI_TEST(request_send,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
-! all slices add the buffer received to the contributions on the left face
- if(iproc_eta > 0) then
+ case( 4 )
- do ipoin = 1,npoin2D_eta_crust_mantle(1)
- accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(1,ipoin,1)
- accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(2,ipoin,1)
- accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(3,ipoin,1)
- enddo
+ ! slices assemble along (left face) eta sides
+ ! and sent out values along eta sides (left face) backward along each column
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_eta_crust_mantle(1)
+ ! checks if messages have been sent out and received
+ ! call MPI_WAIT(request_send,msg_status,ier)
+ ! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
- do ipoin = 1,npoin2D_eta_inner_core(1)
- accel_inner_core(1,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(1,iboolleft_eta_inner_core(ipoin)) + &
- buffer_received_faces_vector(1,ioffset + ipoin,1)
- accel_inner_core(2,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(2,iboolleft_eta_inner_core(ipoin)) + &
- buffer_received_faces_vector(2,ioffset + ipoin,1)
- accel_inner_core(3,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(3,iboolleft_eta_inner_core(ipoin)) + &
- buffer_received_faces_vector(3,ioffset + ipoin,1)
- enddo
+ ! all slices add the buffer received to the contributions on the left face
+ if(iproc_eta > 0) then
- endif
+ do ipoin = 1,npoin2D_eta_crust_mantle(1)
+ accel_crust_mantle(:,iboolleft_eta_crust_mantle(ipoin)) = &
+ accel_crust_mantle(:,iboolleft_eta_crust_mantle(ipoin)) + &
+ buffer_received_faces_vector(:,ipoin,1)
+ enddo
-! the contributions are correctly assembled on the left side of each slice
-! now we have to send the result back to the sender
-! all slices copy the left face into the buffer
- do ipoin = 1,npoin2D_eta_crust_mantle(1)
- buffer_send_faces_vector(1,ipoin,1) = accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin,1) = accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin,1) = accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin))
- enddo
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_eta_crust_mantle(1)
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_eta_crust_mantle(1)
+ do ipoin = 1,npoin2D_eta_inner_core(1)
+ accel_inner_core(:,iboolleft_eta_inner_core(ipoin)) = &
+ accel_inner_core(:,iboolleft_eta_inner_core(ipoin)) + &
+ buffer_received_faces_vector(:,ioffset + ipoin,1)
+ enddo
- do ipoin = 1,npoin2D_eta_inner_core(1)
- buffer_send_faces_vector(1,ioffset + ipoin,1) = accel_inner_core(1,iboolleft_eta_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin,1) = accel_inner_core(2,iboolleft_eta_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin,1) = accel_inner_core(3,iboolleft_eta_inner_core(ipoin))
- enddo
+ endif
-! send messages backward along each row
- if(iproc_eta == NPROC_ETA-1) then
- sender = MPI_PROC_NULL
- else
- sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
- endif
- if(iproc_eta == 0) then
- receiver = MPI_PROC_NULL
- else
- receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
- endif
- call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive,ier)
+ ! the contributions are correctly assembled on the left side of each slice
+ ! now we have to send the result back to the sender
+ ! all slices copy the left face into the buffer
+ do ipoin = 1,npoin2D_eta_crust_mantle(1)
+ buffer_send_faces_vector(:,ipoin,1) = accel_crust_mantle(:,iboolleft_eta_crust_mantle(ipoin))
+ enddo
- call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,receiver, &
- itag2,MPI_COMM_WORLD,request_send,ier)
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_eta_crust_mantle(1)
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ do ipoin = 1,npoin2D_eta_inner_core(1)
+ buffer_send_faces_vector(:,ioffset + ipoin,1) = accel_inner_core(:,iboolleft_eta_inner_core(ipoin))
+ enddo
- endif !!!!!!!!! end of iphase 4
+ ! send messages backward along each row
+ if(iproc_eta == NPROC_ETA-1) then
+ sender = MPI_PROC_NULL
+ else
+ sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
+ endif
+ if(iproc_eta == 0) then
+ receiver = MPI_PROC_NULL
+ else
+ receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
+ endif
+ call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
- if(iphase == 5) then
+ call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
-! call MPI_WAIT(request_send,msg_status,ier)
-! call MPI_WAIT(request_receive,msg_status,ier)
- call MPI_TEST(request_send,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
-! all slices copy the buffer received to the contributions on the right face
- if(iproc_eta < NPROC_ETA-1) then
- do ipoin = 1,npoin2D_eta_crust_mantle(2)
- accel_crust_mantle(1,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin,1)
- accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin,1)
- accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin,1)
- enddo
+ case( 5 )
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_eta_crust_mantle(2)
+ ! slices set (right face) eta sides
+ ! and sent out values for neighbor chunks (iboolfaces)
- do ipoin = 1,npoin2D_eta_inner_core(2)
- accel_inner_core(1,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin,1)
- accel_inner_core(2,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin,1)
- accel_inner_core(3,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin,1)
- enddo
+ ! checks if messages have been sent out and received
+ ! call MPI_WAIT(request_send,msg_status,ier)
+ ! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
- endif
+ ! all slices copy the buffer received to the contributions on the right face
+ if(iproc_eta < NPROC_ETA-1) then
-!----
-!---- start MPI assembling phase between chunks
-!----
+ do ipoin = 1,npoin2D_eta_crust_mantle(2)
+ accel_crust_mantle(:,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(:,ipoin,1)
+ enddo
-! check flag to see if we need to assemble (might be turned off when debugging)
-! and do not assemble if only one chunk
- if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) then
- iphase = 9999 ! this means everything is finished
- return
- endif
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_eta_crust_mantle(2)
-! ***************************************************************
-! transmit messages in forward direction (iprocfrom -> iprocto)
-! ***************************************************************
+ do ipoin = 1,npoin2D_eta_inner_core(2)
+ accel_inner_core(:,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(:,ioffset + ipoin,1)
+ enddo
-!---- put slices in receive mode
-!---- a given slice can belong to at most two faces
+ endif
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ !----
+ !---- start MPI assembling phase between chunks
+ !----
- if(myrank==iprocto_faces(imsg)) then
- sender = iprocfrom_faces(imsg)
+ ! check flag to see if we need to assemble (might be turned off when debugging)
+ ! and do not assemble if only one chunk
+ if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) then
+ iphase_comm = 9999 ! this means everything is finished
+ return
+ endif
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
- npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
+ ! ***************************************************************
+ ! transmit messages in forward direction (iprocfrom -> iprocto)
+ ! ***************************************************************
- call MPI_IRECV(buffer_received_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
+ !---- put slices in receive mode
+ !---- a given slice can belong to at most two faces
-! do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
-! accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
-! accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(1,ipoin2D,icount_faces)
-! accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
-! accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(2,ipoin2D,icount_faces)
-! accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
-! accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(3,ipoin2D,icount_faces)
-! enddo
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-! the buffer for the inner core starts right after the buffer for the crust and mantle
-! ioffset = npoin2D_faces_crust_mantle(icount_faces)
+ if(myrank==iprocto_faces(imsg)) then
+ sender = iprocfrom_faces(imsg)
-! do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
-! accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-! accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
-! buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
-! accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-! accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
-! buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
-! accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-! accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
-! buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
-! enddo
+ ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
- endif
- enddo
+ call MPI_IRECV(buffer_received_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
-!---- put slices in send mode
-!---- a given slice can belong to at most two faces
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ ! do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+ ! accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+ ! accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) &
+ ! + buffer_received_faces_vector(1,ipoin2D,icount_faces)
+ ! accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+ ! accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) &
+ ! + buffer_received_faces_vector(2,ipoin2D,icount_faces)
+ ! accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+ ! accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) &
+ ! + buffer_received_faces_vector(3,ipoin2D,icount_faces)
+ ! enddo
- if(myrank==iprocfrom_faces(imsg)) then
- receiver = iprocto_faces(imsg)
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ! ioffset = npoin2D_faces_crust_mantle(icount_faces)
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
- npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
+ ! do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+ ! accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ ! accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
+ ! buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
+ ! accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ ! accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
+ ! buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
+ ! accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ ! accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
+ ! buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
+ ! enddo
- do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
- buffer_send_faces_vector(1,ipoin2D,icount_faces) = &
- accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces))
- buffer_send_faces_vector(2,ipoin2D,icount_faces) = &
- accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces))
- buffer_send_faces_vector(3,ipoin2D,icount_faces) = &
- accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+ endif
enddo
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_faces_crust_mantle(icount_faces)
+ !---- put slices in send mode
+ !---- a given slice can belong to at most two faces
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
- buffer_send_faces_vector(1,ioffset + ipoin2D,icount_faces) = &
- accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces))
- buffer_send_faces_vector(2,ioffset + ipoin2D,icount_faces) = &
- accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces))
- buffer_send_faces_vector(3,ioffset + ipoin2D,icount_faces) = &
- accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces))
- enddo
+ if(myrank==iprocfrom_faces(imsg)) then
+ receiver = iprocto_faces(imsg)
- call MPI_ISEND(buffer_send_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,receiver,itag, &
- MPI_COMM_WORLD,request_send_array(icount_faces),ier)
- endif
- enddo
+ ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+ buffer_send_faces_vector(:,ipoin2D,icount_faces) = &
+ accel_crust_mantle(:,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+ enddo
- endif !!!!!!!!! end of iphase 5
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_faces_crust_mantle(icount_faces)
- if(iphase == 6) then
+ do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+ buffer_send_faces_vector(:,ioffset + ipoin2D,icount_faces) = &
+ accel_inner_core(:,iboolfaces_inner_core(ipoin2D,icount_faces))
+ enddo
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg)) then
- call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
- endif
- enddo
+ call MPI_ISEND(buffer_send_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,receiver,itag, &
+ MPI_COMM_WORLD,request_send_array(icount_faces),ier)
+ endif
+ enddo
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg)) then
- call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- endif
- enddo
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg)) then
+ case( 6 )
- do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
- accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
- accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) &
- + buffer_received_faces_vector(1,ipoin2D,icount_faces)
- accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
- accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) &
- + buffer_received_faces_vector(2,ipoin2D,icount_faces)
- accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
- accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) &
- + buffer_received_faces_vector(3,ipoin2D,icount_faces)
+ ! receiver slices on chunk faces assemble values (iboolfaces)
+ ! and send values back to senders
+
+ ! checks if messages have been received
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
+ call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+ endif
enddo
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_faces_crust_mantle(icount_faces)
-
- do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
- accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
- accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
- buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
- accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
- accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
- buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
- accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
- accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
- buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
+ ! checks if messages have been sent out
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ endif
enddo
- endif
- enddo
+ ! assembles values on chunk faces
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
-! *********************************************************************
-! transmit messages back in opposite direction (iprocto -> iprocfrom)
-! *********************************************************************
+ do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+ accel_crust_mantle(:,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+ accel_crust_mantle(:,iboolfaces_crust_mantle(ipoin2D,icount_faces)) &
+ + buffer_received_faces_vector(:,ipoin2D,icount_faces)
+ enddo
-!---- put slices in receive mode
-!---- a given slice can belong to at most two faces
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_faces_crust_mantle(icount_faces)
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg)) then
- sender = iprocto_faces(imsg)
+ do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+ accel_inner_core(:,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ accel_inner_core(:,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
+ buffer_received_faces_vector(:,ioffset + ipoin2D,icount_faces)
+ enddo
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
- npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
+ endif
+ enddo
- call MPI_IRECV(buffer_received_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
+ ! *********************************************************************
+ ! transmit messages back in opposite direction (iprocto -> iprocfrom)
+ ! *********************************************************************
-! do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
-! accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(1,ipoin2D,icount_faces)
-! accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(2,ipoin2D,icount_faces)
-! accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(3,ipoin2D,icount_faces)
-! enddo
+ !---- put slices in receive mode
+ !---- a given slice can belong to at most two faces
-! the buffer for the inner core starts right after the buffer for the crust and mantle
-! ioffset = npoin2D_faces_crust_mantle(icount_faces)
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ sender = iprocto_faces(imsg)
-! do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
-! accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-! buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
-! accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-! buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
-! accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-! buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
-! enddo
+ ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
- endif
- enddo
+ call MPI_IRECV(buffer_received_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
-!---- put slices in send mode
-!---- a given slice can belong to at most two faces
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg)) then
- receiver = iprocfrom_faces(imsg)
+ ! do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+ ! accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+ ! buffer_received_faces_vector(1,ipoin2D,icount_faces)
+ ! accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+ ! buffer_received_faces_vector(2,ipoin2D,icount_faces)
+ ! accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+ ! buffer_received_faces_vector(3,ipoin2D,icount_faces)
+ ! enddo
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
- npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ! ioffset = npoin2D_faces_crust_mantle(icount_faces)
- do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
- buffer_send_faces_vector(1,ipoin2D,icount_faces) = &
- accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces))
- buffer_send_faces_vector(2,ipoin2D,icount_faces) = &
- accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces))
- buffer_send_faces_vector(3,ipoin2D,icount_faces) = &
- accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+ ! do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+ ! accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ ! buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
+ ! accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ ! buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
+ ! accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ ! buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
+ ! enddo
+
+ endif
enddo
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_faces_crust_mantle(icount_faces)
+ !---- put slices in send mode
+ !---- a given slice can belong to at most two faces
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
+ receiver = iprocfrom_faces(imsg)
- do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
- buffer_send_faces_vector(1,ioffset + ipoin2D,icount_faces) = &
- accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces))
- buffer_send_faces_vector(2,ioffset + ipoin2D,icount_faces) = &
- accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces))
- buffer_send_faces_vector(3,ioffset + ipoin2D,icount_faces) = &
- accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces))
- enddo
+ ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
- call MPI_ISEND(buffer_send_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,receiver,itag, &
- MPI_COMM_WORLD,request_send_array(icount_faces),ier)
- endif
- enddo
+ do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+ buffer_send_faces_vector(:,ipoin2D,icount_faces) = &
+ accel_crust_mantle(:,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+ enddo
- iphase = iphase + 1
- return ! exit because we have started some communications therefore we need some time
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_faces_crust_mantle(icount_faces)
- endif !!!!!!!!! end of iphase 6
+ do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+ buffer_send_faces_vector(:,ioffset + ipoin2D,icount_faces) = &
+ accel_inner_core(:,iboolfaces_inner_core(ipoin2D,icount_faces))
+ enddo
- if(iphase == 7) then
+ call MPI_ISEND(buffer_send_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all, &
+ CUSTOM_MPI_TYPE,receiver,itag, &
+ MPI_COMM_WORLD,request_send_array(icount_faces),ier)
+ endif
+ enddo
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg)) then
- call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not received yet
- endif
- enddo
+ iphase_comm = iphase_comm + 1
+ return ! exit because we have started some communications therefore we need some time
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg)) then
- call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
- if(.not. flag_result_test) return ! exit if message not sent yet
- endif
- enddo
+ case( 7 )
- icount_faces = 0
- do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg)) then
- do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
- accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
- buffer_received_faces_vector(1,ipoin2D,icount_faces)
- accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
- buffer_received_faces_vector(2,ipoin2D,icount_faces)
- accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
- buffer_received_faces_vector(3,ipoin2D,icount_faces)
+ ! sender slices on chunk faces set values (iboolfaces)
+ ! and slices on corners assemble values on corners with other chunks
+
+ ! checks if messages have been sent
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
+ call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+ endif
enddo
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_faces_crust_mantle(icount_faces)
+ ! checks if messages have been received
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ endif
+ enddo
- do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
- accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
- buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
- accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
- buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
- accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
- buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
+ ! sets values on faces (iboolfaces)
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+ accel_crust_mantle(:,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+ buffer_received_faces_vector(:,ipoin2D,icount_faces)
+ enddo
+
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_faces_crust_mantle(icount_faces)
+
+ do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+ accel_inner_core(:,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ buffer_received_faces_vector(:,ioffset + ipoin2D,icount_faces)
+ enddo
+ endif
enddo
- endif
- enddo
-! this is the exit condition, to go beyond the last phase number
- iphase = iphase + 1
+ ! this is the exit condition, to go beyond the last phase number
+ iphase_comm = iphase_comm + 1
-!! DK DK do the rest in blocking for now, for simplicity
+ !! DK DK do the rest in blocking for now, for simplicity
-!----
-!---- start MPI assembling corners
-!----
+ !----
+ !---- start MPI assembling corners
+ !----
-! scheme for corners cannot deadlock even if NPROC_XI = NPROC_ETA = 1
+ ! scheme for corners cannot deadlock even if NPROC_XI = NPROC_ETA = 1
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
- NGLOB1D_RADIAL_all = NGLOB1D_RADIAL_crust_mantle + NGLOB1D_RADIAL_inner_core
+ ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ NGLOB1D_RADIAL_all = NGLOB1D_RADIAL_crust_mantle + NGLOB1D_RADIAL_inner_core
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = NGLOB1D_RADIAL_crust_mantle
+ ! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = NGLOB1D_RADIAL_crust_mantle
-! ***************************************************************
-! transmit messages in forward direction (two workers -> master)
-! ***************************************************************
+ ! ***************************************************************
+ ! transmit messages in forward direction (two workers -> master)
+ ! ***************************************************************
- icount_corners = 0
+ icount_corners = 0
- do imsg = 1,NCORNERSCHUNKS
+ do imsg = 1,NCORNERSCHUNKS
- if(myrank == iproc_master_corners(imsg) .or. &
- myrank == iproc_worker1_corners(imsg) .or. &
- (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
+ if(myrank == iproc_master_corners(imsg) .or. &
+ myrank == iproc_worker1_corners(imsg) .or. &
+ (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
-!---- receive messages from the two workers on the master
- if(myrank==iproc_master_corners(imsg)) then
+ !---- receive messages from the two workers on the master
+ if(myrank==iproc_master_corners(imsg)) then
-! receive from worker #1 and add to local array
- sender = iproc_worker1_corners(imsg)
+ ! receive from worker #1 and add to local array
+ sender = iproc_worker1_corners(imsg)
- call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
- CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
+ CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
- accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
- accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(1,ipoin1D)
- accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
- accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(2,ipoin1D)
- accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
- accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(3,ipoin1D)
- enddo
+ do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
+ accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
+ accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
+ buffer_recv_chunkcorn_vector(:,ipoin1D)
+ enddo
- do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
- accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
- accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(1,ioffset + ipoin1D)
- accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
- accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(2,ioffset + ipoin1D)
- accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
- accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(3,ioffset + ipoin1D)
- enddo
+ do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
+ accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
+ accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
+ buffer_recv_chunkcorn_vector(:,ioffset + ipoin1D)
+ enddo
-! receive from worker #2 and add to local array
- if(NCHUNKS /= 2) then
+ ! receive from worker #2 and add to local array
+ if(NCHUNKS /= 2) then
- sender = iproc_worker2_corners(imsg)
+ sender = iproc_worker2_corners(imsg)
- call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
- CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
+ CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
- accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
- accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(1,ipoin1D)
- accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
- accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(2,ipoin1D)
- accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
- accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(3,ipoin1D)
- enddo
+ do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
+ accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
+ accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
+ buffer_recv_chunkcorn_vector(:,ipoin1D)
+ enddo
- do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
- accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
- accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(1,ioffset + ipoin1D)
- accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
- accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(2,ioffset + ipoin1D)
- accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
- accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
- buffer_recv_chunkcorn_vector(3,ioffset + ipoin1D)
+ do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
+ accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
+ accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
+ buffer_recv_chunkcorn_vector(:,ioffset + ipoin1D)
+ enddo
+
+ endif
+
+ endif
+
+ !---- send messages from the two workers to the master
+ if(myrank==iproc_worker1_corners(imsg) .or. &
+ (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+
+ receiver = iproc_master_corners(imsg)
+
+ do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
+ buffer_send_chunkcorn_vector(:,ipoin1D) = &
+ accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners))
+ enddo
+
+ do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
+ buffer_send_chunkcorn_vector(:,ioffset + ipoin1D) = &
+ accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners))
+ enddo
+
+ call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
+ CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+
+ endif
+
+ ! *********************************************************************
+ ! transmit messages back in opposite direction (master -> two workers)
+ ! *********************************************************************
+
+ !---- receive messages from the master on the two workers
+ if(myrank==iproc_worker1_corners(imsg) .or. &
+ (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+
+ ! receive from master and copy to local array
+ sender = iproc_master_corners(imsg)
+
+ call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
+ CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+
+ do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
+ accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
+ buffer_recv_chunkcorn_vector(:,ipoin1D)
+ enddo
+
+ do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
+ accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
+ buffer_recv_chunkcorn_vector(:,ioffset + ipoin1D)
+ enddo
+
+ endif
+
+ !---- send messages from the master to the two workers
+ if(myrank==iproc_master_corners(imsg)) then
+
+ do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
+ buffer_send_chunkcorn_vector(:,ipoin1D) = &
+ accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners))
+ enddo
+
+ do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
+ buffer_send_chunkcorn_vector(:,ioffset + ipoin1D) = &
+ accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners))
+ enddo
+
+ ! send to worker #1
+ receiver = iproc_worker1_corners(imsg)
+ call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all,&
+ CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+
+ ! send to worker #2
+ if(NCHUNKS /= 2) then
+ receiver = iproc_worker2_corners(imsg)
+ call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all,&
+ CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+ endif
+
+ endif
+
enddo
+ end select
+
+ end subroutine assemble_MPI_vector
+
+!
+!-------------------------------------------------------------------------------------------------
+!-------------------------------------------------------------------------------------------------
+!-------------------------------------------------------------------------------------------------
+!
+!
+
+!daniel: todo - still local versions
+
+ subroutine assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB, &
+ array_val, &
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+
+ ! sends data
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+
+ ! array to assemble
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
+
+ integer ipoin,iinterface
+
+ ! here we have to assemble all the contributions between partitions using MPI
+
+ ! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ ! partition border copy into the buffer
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ buffer_send_vector_ext_mesh(:,ipoin,iinterface) = &
+ array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface))
+ enddo
+ enddo
+
+ ! send messages
+ do iinterface = 1, num_interfaces_ext_mesh
+ call isend_cr(buffer_send_vector_ext_mesh(1,1,iinterface), &
+ NDIM*nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_send_vector_ext_mesh(iinterface) &
+ )
+ call irecv_cr(buffer_recv_vector_ext_mesh(1,1,iinterface), &
+ NDIM*nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_recv_vector_ext_mesh(iinterface) &
+ )
+ enddo
+
endif
+ end subroutine assemble_MPI_vector_ext_mesh_s
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+!daniel: TODO - cuda scalar assembly...
+
+! interrupt might improve MPI performance
+! see: https://computing.llnl.gov/tutorials/mpi_performance/#Sender-ReceiverSync
+!
+! check: MP_CSS_INTERRUPT environment variable on IBM systems
+
+
+ subroutine assemble_MPI_vector_send_cuda(NPROC, &
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh,&
+ IREGION,FORWARD_OR_ADJOINT)
+
+ ! sends data
+ ! note: array to assemble already filled into buffer_send_vector_ext_mesh array
+ use constants
+ use specfem_par,only: Mesh_pointer
+
+ implicit none
+
+ integer :: NPROC
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
+
+ integer :: IREGION
+ integer :: FORWARD_OR_ADJOINT
+
+ ! local parameters
+ integer iinterface
+
+ ! send only if more than one partition
+ if(NPROC > 1) then
+
+ ! preparation of the contribution between partitions using MPI
+ ! transfers mpi buffers to CPU
+ call transfer_boun_accel_from_device(Mesh_pointer, &
+ buffer_send_vector_ext_mesh,&
+ IREGION,FORWARD_OR_ADJOINT)
+
+ ! send messages
+ do iinterface = 1, num_interfaces_ext_mesh
+ call isend_cr(buffer_send_vector_ext_mesh(1,1,iinterface), &
+ NDIM*nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_send_vector_ext_mesh(iinterface) &
+ )
+ call irecv_cr(buffer_recv_vector_ext_mesh(1,1,iinterface), &
+ NDIM*nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_recv_vector_ext_mesh(iinterface) &
+ )
+ enddo
+
endif
-!---- send messages from the two workers to the master
- if(myrank==iproc_worker1_corners(imsg) .or. &
- (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+ end subroutine assemble_MPI_vector_send_cuda
- receiver = iproc_master_corners(imsg)
+!
+!-------------------------------------------------------------------------------------------------
+!
- do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
- buffer_send_chunkcorn_vector(1,ipoin1D) = &
- accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners))
- buffer_send_chunkcorn_vector(2,ipoin1D) = &
- accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners))
- buffer_send_chunkcorn_vector(3,ipoin1D) = &
- accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners))
+ subroutine assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_AB, &
+ array_val, &
+ buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+
+! waits for data to receive and assembles
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+
+ ! array to assemble
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_recv_vector_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
+
+ integer ipoin,iinterface
+
+! here we have to assemble all the contributions between partitions using MPI
+
+ ! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ ! wait for communications completion (recv)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_recv_vector_ext_mesh(iinterface))
enddo
- do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
- buffer_send_chunkcorn_vector(1,ioffset + ipoin1D) = &
- accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners))
- buffer_send_chunkcorn_vector(2,ioffset + ipoin1D) = &
- accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners))
- buffer_send_chunkcorn_vector(3,ioffset + ipoin1D) = &
- accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners))
+ ! adding contributions of neighbours
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) &
+ + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
+ enddo
enddo
- call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+ ! wait for communications completion (send)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_send_vector_ext_mesh(iinterface))
+ enddo
endif
-! *********************************************************************
-! transmit messages back in opposite direction (master -> two workers)
-! *********************************************************************
+ end subroutine assemble_MPI_vector_ext_mesh_w
-!---- receive messages from the master on the two workers
- if(myrank==iproc_worker1_corners(imsg) .or. &
- (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
-! receive from master and copy to local array
- sender = iproc_master_corners(imsg)
+!
+!-------------------------------------------------------------------------------------------------
+!
- call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
- CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ subroutine assemble_MPI_vector_write_cuda(NPROC, &
+ buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh, &
+ IREGION,FORWARD_OR_ADJOINT )
- do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
- accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
- buffer_recv_chunkcorn_vector(1,ipoin1D)
- accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
- buffer_recv_chunkcorn_vector(2,ipoin1D)
- accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
- buffer_recv_chunkcorn_vector(3,ipoin1D)
+! waits for data to receive and assembles
+ use constants
+ use specfem_par,only: Mesh_pointer
+
+ implicit none
+
+ integer :: NPROC
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_recv_vector_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
+
+ integer :: IREGION
+ integer :: FORWARD_OR_ADJOINT
+
+ ! local parameters
+
+ integer iinterface
+
+! here we have to assemble all the contributions between partitions using MPI
+
+ ! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ ! wait for communications completion (recv)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_recv_vector_ext_mesh(iinterface))
enddo
- do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
- accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
- buffer_recv_chunkcorn_vector(1,ioffset + ipoin1D)
- accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
- buffer_recv_chunkcorn_vector(2,ioffset + ipoin1D)
- accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
- buffer_recv_chunkcorn_vector(3,ioffset + ipoin1D)
+ ! adding contributions of neighbours
+ call transfer_asmbl_accel_to_device(Mesh_pointer, &
+ buffer_recv_vector_ext_mesh, &
+ IREGION,FORWARD_OR_ADJOINT)
+
+ ! This step is done via previous function transfer_and_assemble...
+ ! do iinterface = 1, num_interfaces_ext_mesh
+ ! do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
+ ! enddo
+ ! enddo
+
+ ! wait for communications completion (send)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_send_vector_ext_mesh(iinterface))
enddo
endif
-!---- send messages from the master to the two workers
- if(myrank==iproc_master_corners(imsg)) then
+ end subroutine assemble_MPI_vector_write_cuda
- do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
- buffer_send_chunkcorn_vector(1,ipoin1D) = &
- accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners))
- buffer_send_chunkcorn_vector(2,ipoin1D) = &
- accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners))
- buffer_send_chunkcorn_vector(3,ipoin1D) = &
- accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners))
- enddo
+!
+!----
+!
- do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
- buffer_send_chunkcorn_vector(1,ioffset + ipoin1D) = &
- accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners))
- buffer_send_chunkcorn_vector(2,ioffset + ipoin1D) = &
- accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners))
- buffer_send_chunkcorn_vector(3,ioffset + ipoin1D) = &
- accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners))
- enddo
+ subroutine irecv_cr(recvbuf, recvcount, dest, recvtag, req)
-! send to worker #1
- receiver = iproc_worker1_corners(imsg)
- call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+ implicit none
-! send to worker #2
- if(NCHUNKS /= 2) then
- receiver = iproc_worker2_corners(imsg)
- call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+! standard include of the MPI library
+ include 'mpif.h'
- endif
+ include "constants.h"
+ include "precision.h"
- endif
+ integer recvcount, dest, recvtag, req
+ real(kind=CUSTOM_REAL), dimension(recvcount) :: recvbuf
- enddo
+ integer ier
- endif !!!!!!!!! end of iphase 7
+ call MPI_IRECV(recvbuf(1),recvcount,CUSTOM_MPI_TYPE,dest,recvtag, &
+ MPI_COMM_WORLD,req,ier)
- end subroutine assemble_MPI_vector
+ end subroutine irecv_cr
+!
+!----
+!
+
+ subroutine isend_cr(sendbuf, sendcount, dest, sendtag, req)
+
+ implicit none
+
+! standard include of the MPI library
+ include 'mpif.h'
+
+ include "constants.h"
+ include "precision.h"
+
+ integer sendcount, dest, sendtag, req
+ real(kind=CUSTOM_REAL), dimension(sendcount) :: sendbuf
+
+ integer ier
+
+ call MPI_ISEND(sendbuf(1),sendcount,CUSTOM_MPI_TYPE,dest,sendtag, &
+ MPI_COMM_WORLD,req,ier)
+
+ end subroutine isend_cr
+
+!
+!----
+!
+
+ subroutine wait_req(req)
+
+ implicit none
+
+! standard include of the MPI library
+ include 'mpif.h'
+
+ integer :: req
+
+ integer, dimension(MPI_STATUS_SIZE) :: req_mpi_status
+
+ integer :: ier
+
+ call mpi_wait(req,req_mpi_status,ier)
+
+ end subroutine wait_req
+
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -27,7 +27,9 @@
subroutine check_simulation_stability(it,displ_crust_mantle,displ_inner_core,displ_outer_core, &
b_displ_crust_mantle,b_displ_inner_core,b_displ_outer_core, &
- eps_trace_over_3_crust_mantle,epsilondev_crust_mantle, &
+ eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
SIMULATION_TYPE,OUTPUT_FILES,time_start,DT,t0,NSTEP, &
myrank) !COMPUTE_AND_STORE_STRAIN,myrank)
@@ -52,9 +54,13 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STRAIN_ONLY) :: &
eps_trace_over_3_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
- epsilondev_crust_mantle
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
+! epsilondev_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle
+
integer SIMULATION_TYPE
character(len=150) OUTPUT_FILES
@@ -124,7 +130,9 @@
if (COMPUTE_AND_STORE_STRAIN) then
Strain_norm = maxval(abs(eps_trace_over_3_crust_mantle))
- strain2_norm= maxval(abs(epsilondev_crust_mantle))
+ strain2_norm= max( maxval(abs(epsilondev_xx_crust_mantle)),maxval(abs(epsilondev_yy_crust_mantle)), &
+ maxval(abs(epsilondev_xy_crust_mantle)),maxval(abs(epsilondev_xz_crust_mantle)), &
+ maxval(abs(epsilondev_yz_crust_mantle)) )
call MPI_REDUCE(Strain_norm,Strain_norm_all,1,CUSTOM_MPI_TYPE,MPI_MAX,0, &
MPI_COMM_WORLD,ier)
call MPI_REDUCE(Strain2_norm,Strain2_norm_all,1,CUSTOM_MPI_TYPE,MPI_MAX,0, &
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -274,10 +274,10 @@
! the file at NSTEP corresponds to -t0 + (NSTEP-1)*DT
!
! the same time step is saved for the forward wavefields to reconstruct them;
- ! however, the Newark time scheme acts at the very beginning of this time loop
+ ! however, the Newmark time scheme acts at the very beginning of this time loop
! such that we have the backward/reconstructed wavefield updated by
! a single time step into the direction -DT and b_displ(it=1) would corresponds to -t0 + (NSTEP-1)*DT - DT
- ! after the Newark (predictor) time step update.
+ ! after the Newmark (predictor) time step update.
! however, we will read the backward/reconstructed wavefield at the end of the first time loop,
! such that b_displ(it=1) corresponds to -t0 + (NSTEP-1)*DT (which is the one saved in the files).
!
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_arrays_source.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_arrays_source.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_arrays_source.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -183,7 +183,7 @@
it_end = iadjsrc(it_sub_adj,1)+NSTEP_BLOCK-1
- ! unfortunately, things become more tricky because of the Newark time scheme at
+ ! unfortunately, things become more tricky because of the Newmark time scheme at
! the very beginning of the time loop. however, when we read in the backward/reconstructed
! wavefields at the end of the first time loop, we can use the adjoint source index from 3000 down to 1.
!
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -30,12 +30,6 @@
! or compile with: -D_HANDOPT
!#define _HANDOPT
-! BEWARE:
-! BEWARE: we have observed that using _HANDOPT in combination with -O3 or higher can lead to problems on some machines;
-! BEWARE: thus, be careful when using it. At the very least, run the same test simulation once with _HANDOPT and once without
-! BEWARE: and make sure that all the seismograms you get are the same down to roundoff noise.
-! BEWARE:
-
! note: these hand optimizations should help compilers to pipeline the code and make better use of the cache;
! depending on compilers, it can further decrease the computation time by ~ 30%.
! the original routines are commented with "! way 1", the hand-optimized routines with "! way 2"
@@ -47,7 +41,8 @@
wgll_cube, &
kappavstore,muvstore, &
ibool, &
- R_memory,epsilon_trace_over_3, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilon_trace_over_3, &
one_minus_sum_beta,vx,vy,vz,vnspec, &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
@@ -86,7 +81,11 @@
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
+
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
+ R_xx,R_yy,R_xy,R_xz,R_yz
+
real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
integer :: vx,vy,vz,vnspec
@@ -219,8 +218,12 @@
if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. ) ) then
! note: fortran passes pointers to array location, thus R_memory(1,1,...) should be fine
- call compute_element_att_stress( R_memory(1,1,i,j,k,ispec), &
- sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
+ call compute_element_att_stress(R_xx(1,i,j,k,ispec), &
+ R_yy(1,i,j,k,ispec), &
+ R_xy(1,i,j,k,ispec), &
+ R_xz(1,i,j,k,ispec), &
+ R_yz(1,i,j,k,ispec), &
+ sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
endif ! ATTENUATION_VAL
@@ -370,7 +373,8 @@
wgll_cube, &
kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
ibool, &
- R_memory,epsilon_trace_over_3, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilon_trace_over_3, &
one_minus_sum_beta,vx,vy,vz,vnspec, &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
@@ -411,7 +415,10 @@
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
+ R_xx,R_yy,R_xy,R_xz,R_yz
+
real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
integer vx,vy,vz,vnspec
@@ -753,8 +760,12 @@
if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. ) ) then
! note: fortran passes pointers to array location, thus R_memory(1,1,...) should be fine
- call compute_element_att_stress( R_memory(1,1,i,j,k,ispec), &
- sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
+ call compute_element_att_stress(R_xx(1,i,j,k,ispec), &
+ R_yy(1,i,j,k,ispec), &
+ R_xy(1,i,j,k,ispec), &
+ R_xz(1,i,j,k,ispec), &
+ R_yz(1,i,j,k,ispec), &
+ sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
endif ! ATTENUATION_VAL
@@ -903,7 +914,8 @@
c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
ibool, &
- R_memory,epsilon_trace_over_3, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilon_trace_over_3, &
one_minus_sum_beta,vx,vy,vz,vnspec, &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
@@ -944,7 +956,10 @@
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
+ R_xx,R_yy,R_xy,R_xz,R_yz
+
real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
integer vx,vy,vz,vnspec
@@ -1116,8 +1131,12 @@
if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. ) ) then
! note: fortran passes pointers to array location, thus R_memory(1,1,...) should be fine
- call compute_element_att_stress( R_memory(1,1,i,j,k,ispec), &
- sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
+ call compute_element_att_stress(R_xx(1,i,j,k,ispec), &
+ R_yy(1,i,j,k,ispec), &
+ R_xy(1,i,j,k,ispec), &
+ R_xz(1,i,j,k,ispec), &
+ R_yz(1,i,j,k,ispec), &
+ sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
endif ! ATTENUATION_VAL
@@ -1254,8 +1273,8 @@
!
- subroutine compute_element_att_stress( R_memory_loc, &
- sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
+ subroutine compute_element_att_stress(R_xx_loc,R_yy_loc,R_xy_loc,R_xz_loc,R_yz_loc, &
+ sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
implicit none
@@ -1269,19 +1288,25 @@
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(5,N_SLS) :: R_memory_loc
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS) :: R_memory_loc
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xx_loc
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_yy_loc
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xy_loc
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xz_loc
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_yz_loc
+
real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
! local parameters
real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1
integer :: i_SLS
-#ifdef _HANDOPT
+#ifdef _HANDOPT_ATT
real(kind=CUSTOM_REAL) R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
integer :: imodulo_N_SLS
integer :: i_SLS1,i_SLS2
#endif
-#ifdef _HANDOPT
+#ifdef __HANDOPT_ATT
! way 2:
! note: this should help compilers to pipeline the code and make better use of the cache;
! depending on compilers, it can further decrease the computation time by ~ 30%.
@@ -1290,61 +1315,61 @@
if(imodulo_N_SLS >= 1) then
do i_SLS = 1,imodulo_N_SLS
- R_xx_val1 = R_memory_loc(1,i_SLS)
- R_yy_val1 = R_memory_loc(2,i_SLS)
+ R_xx_val1 = R_xx_loc(i_SLS)
+ R_yy_val1 = R_yy_loc(i_SLS)
sigma_xx = sigma_xx - R_xx_val1
sigma_yy = sigma_yy - R_yy_val1
sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
- sigma_xy = sigma_xy - R_memory_loc(3,i_SLS)
- sigma_xz = sigma_xz - R_memory_loc(4,i_SLS)
- sigma_yz = sigma_yz - R_memory_loc(5,i_SLS)
+ sigma_xy = sigma_xy - R_xy_loc(i_SLS)
+ sigma_xz = sigma_xz - R_xz_loc(i_SLS)
+ sigma_yz = sigma_yz - R_yz_loc(i_SLS)
enddo
endif
if(N_SLS >= imodulo_N_SLS+1) then
! note: another possibility would be using a reduction example for this loop; was tested but it does not improve,
! probably since N_SLS == 3 is too small for a loop benefit
do i_SLS = imodulo_N_SLS+1,N_SLS,3
- R_xx_val1 = R_memory_loc(1,i_SLS)
- R_yy_val1 = R_memory_loc(2,i_SLS)
+ R_xx_val1 = R_xx_loc(i_SLS)
+ R_yy_val1 = R_yy_loc(i_SLS)
sigma_xx = sigma_xx - R_xx_val1
sigma_yy = sigma_yy - R_yy_val1
sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
- sigma_xy = sigma_xy - R_memory_loc(3,i_SLS)
- sigma_xz = sigma_xz - R_memory_loc(4,i_SLS)
- sigma_yz = sigma_yz - R_memory_loc(5,i_SLS)
+ sigma_xy = sigma_xy - R_xy_loc(i_SLS)
+ sigma_xz = sigma_xz - R_xz_loc(i_SLS)
+ sigma_yz = sigma_yz - R_yz_loc(i_SLS)
i_SLS1=i_SLS+1
- R_xx_val2 = R_memory_loc(1,i_SLS1)
- R_yy_val2 = R_memory_loc(2,i_SLS1)
+ R_xx_val2 = R_xx_loc(i_SLS1)
+ R_yy_val2 = R_yy_loc(i_SLS1)
sigma_xx = sigma_xx - R_xx_val2
sigma_yy = sigma_yy - R_yy_val2
sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
- sigma_xy = sigma_xy - R_memory_loc(3,i_SLS1)
- sigma_xz = sigma_xz - R_memory_loc(4,i_SLS1)
- sigma_yz = sigma_yz - R_memory_loc(5,i_SLS1)
+ sigma_xy = sigma_xy - R_xy_loc(i_SLS1)
+ sigma_xz = sigma_xz - R_xz_loc(i_SLS1)
+ sigma_yz = sigma_yz - R_yz_loc(i_SLS1)
i_SLS2 =i_SLS+2
- R_xx_val3 = R_memory_loc(1,i_SLS2)
- R_yy_val3 = R_memory_loc(2,i_SLS2)
+ R_xx_val3 = R_xx_loc(i_SLS2)
+ R_yy_val3 = R_yy_loc(i_SLS2)
sigma_xx = sigma_xx - R_xx_val3
sigma_yy = sigma_yy - R_yy_val3
sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
- sigma_xy = sigma_xy - R_memory_loc(3,i_SLS2)
- sigma_xz = sigma_xz - R_memory_loc(4,i_SLS2)
- sigma_yz = sigma_yz - R_memory_loc(5,i_SLS2)
+ sigma_xy = sigma_xy - R_xy_loc(i_SLS2)
+ sigma_xz = sigma_xz - R_xz_loc(i_SLS2)
+ sigma_yz = sigma_yz - R_yz_loc(i_SLS2)
enddo
endif
#else
! way 1:
do i_SLS = 1,N_SLS
- R_xx_val1 = R_memory_loc(1,i_SLS) ! R_memory(1,i_SLS,i,j,k,ispec)
- R_yy_val1 = R_memory_loc(2,i_SLS) ! R_memory(2,i_SLS,i,j,k,ispec)
+ R_xx_val1 = R_xx_loc(i_SLS) ! R_memory(1,i_SLS,i,j,k,ispec)
+ R_yy_val1 = R_yy_loc(i_SLS) ! R_memory(2,i_SLS,i,j,k,ispec)
sigma_xx = sigma_xx - R_xx_val1
sigma_yy = sigma_yy - R_yy_val1
sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
- sigma_xy = sigma_xy - R_memory_loc(3,i_SLS) ! R_memory(3,i_SLS,i,j,k,ispec)
- sigma_xz = sigma_xz - R_memory_loc(4,i_SLS) ! R_memory(4,i_SLS,i,j,k,ispec)
- sigma_yz = sigma_yz - R_memory_loc(5,i_SLS) ! R_memory(5,i_SLS,i,j,k,ispec)
+ sigma_xy = sigma_xy - R_xy_loc(i_SLS) ! R_memory(3,i_SLS,i,j,k,ispec)
+ sigma_xz = sigma_xz - R_xz_loc(i_SLS) ! R_memory(4,i_SLS,i,j,k,ispec)
+ sigma_yz = sigma_yz - R_yz_loc(i_SLS) ! R_memory(5,i_SLS,i,j,k,ispec)
enddo
#endif
@@ -1355,11 +1380,13 @@
!--------------------------------------------------------------------------------------------
!
- subroutine compute_element_att_memory_cr(ispec,R_memory, &
+ subroutine compute_element_att_memory_cr(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, &
vx,vy,vz,vnspec,factor_common, &
alphaval,betaval,gammaval, &
c44store,muvstore, &
- epsilondev,epsilondev_loc)
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz, &
+ epsilondev_loc)
! crust mantle
! update memory variables based upon the Runge-Kutta scheme
@@ -1388,7 +1415,9 @@
! element id
integer :: ispec
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
+ R_xx,R_yy,R_xy,R_xz,R_yz
integer :: vx,vy,vz,vnspec
real(kind=CUSTOM_REAL), dimension(N_SLS, vx, vy, vz, vnspec) :: factor_common
@@ -1397,18 +1426,19 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: c44store
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: muvstore
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilondev
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilondev
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
+
real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
! local parameters
real(kind=CUSTOM_REAL), dimension(NGLLX, NGLLY, NGLLZ) :: factor_common_c44_muv
integer :: i_SLS
-#ifdef _HANDOPT
+#ifdef __HANDOPT_ATT
real(kind=CUSTOM_REAL) :: alphal,betal,gammal
integer :: i,j,k
-#else
- integer :: i_memory
#endif
! use Runge-Kutta scheme to march in time
@@ -1417,7 +1447,7 @@
! IMPROVE we use mu_v here even if there is some anisotropy
! IMPROVE we should probably use an average value instead
-#ifdef _HANDOPT
+#ifdef __HANDOPT_ATT
! way 2:
do i_SLS = 1,N_SLS
@@ -1438,9 +1468,31 @@
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- R_memory(:,i_SLS,i,j,k,ispec) = alphal * R_memory(:,i_SLS,i,j,k,ispec) &
+! R_memory(:,i_SLS,i,j,k,ispec) = alphal * R_memory(:,i_SLS,i,j,k,ispec) &
+! + factor_common_c44_muv(i,j,k) &
+! *( betal * epsilondev(:,i,j,k,ispec) + gammal * epsilondev_loc(:,i,j,k))
+
+ R_xx(i_SLS,i,j,k,ispec) = alphal * R_xx(i_SLS,i,j,k,ispec) &
+ factor_common_c44_muv(i,j,k) &
- *( betal * epsilondev(:,i,j,k,ispec) + gammal * epsilondev_loc(:,i,j,k))
+ *( betal * epsilondev_xx(i,j,k,ispec) + gammal * epsilondev_loc(1,i,j,k))
+
+ R_yy(i_SLS,i,j,k,ispec) = alphal * R_yy(i_SLS,i,j,k,ispec) &
+ + factor_common_c44_muv(i,j,k) &
+ *( betal * epsilondev_yy(i,j,k,ispec) + gammal * epsilondev_loc(2,i,j,k))
+
+ R_xy(i_SLS,i,j,k,ispec) = alphal * R_xy(i_SLS,i,j,k,ispec) &
+ + factor_common_c44_muv(i,j,k) &
+ *( betal * epsilondev_xy(i,j,k,ispec) + gammal * epsilondev_loc(3,i,j,k))
+
+ R_xz(i_SLS,i,j,k,ispec) = alphal * R_xz(i_SLS,i,j,k,ispec) &
+ + factor_common_c44_muv(i,j,k) &
+ *( betal * epsilondev_xz(i,j,k,ispec) + gammal * epsilondev_loc(4,i,j,k))
+
+ R_yz(i_SLS,i,j,k,ispec) = alphal * R_yz(i_SLS,i,j,k,ispec) &
+ + factor_common_c44_muv(i,j,k) &
+ *( betal * epsilondev_yz(i,j,k,ispec) + gammal * epsilondev_loc(5,i,j,k))
+
+
enddo
enddo
enddo
@@ -1457,11 +1509,33 @@
factor_common_c44_muv(:,:,:) = factor_common_c44_muv(:,:,:) * muvstore(:,:,:,ispec)
endif
- do i_memory = 1,5
- R_memory(i_memory,i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_memory(i_memory,i_SLS,:,:,:,ispec) &
+! do i_memory = 1,5
+! R_memory(i_memory,i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_memory(i_memory,i_SLS,:,:,:,ispec) &
+! + factor_common_c44_muv(:,:,:) &
+! * (betaval(i_SLS) * epsilondev(i_memory,:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
+! enddo
+
+ R_xx(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xx(i_SLS,:,:,:,ispec) &
+ factor_common_c44_muv(:,:,:) &
- * (betaval(i_SLS) * epsilondev(i_memory,:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
- enddo
+ * (betaval(i_SLS) * epsilondev_xx(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(1,:,:,:))
+
+ R_yy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yy(i_SLS,:,:,:,ispec) &
+ + factor_common_c44_muv(:,:,:) &
+ * (betaval(i_SLS) * epsilondev_yy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(2,:,:,:))
+
+ R_xy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xy(i_SLS,:,:,:,ispec) &
+ + factor_common_c44_muv(:,:,:) &
+ * (betaval(i_SLS) * epsilondev_xy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(3,:,:,:))
+
+ R_xz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xz(i_SLS,:,:,:,ispec) &
+ + factor_common_c44_muv(:,:,:) &
+ * (betaval(i_SLS) * epsilondev_xz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(4,:,:,:))
+
+ R_yz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yz(i_SLS,:,:,:,ispec) &
+ + factor_common_c44_muv(:,:,:) &
+ * (betaval(i_SLS) * epsilondev_yz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(5,:,:,:))
+
+
enddo ! i_SLS
#endif
@@ -1472,11 +1546,13 @@
!--------------------------------------------------------------------------------------------
!
- subroutine compute_element_att_memory_ic(ispec,R_memory, &
+ subroutine compute_element_att_memory_ic(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, &
vx,vy,vz,vnspec,factor_common, &
alphaval,betaval,gammaval, &
muvstore, &
- epsilondev,epsilondev_loc)
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz, &
+ epsilondev_loc)
! inner core
! update memory variables based upon the Runge-Kutta scheme
@@ -1505,15 +1581,21 @@
! element id
integer :: ispec
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: R_memory
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: R_memory
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
+ R_xx,R_yy,R_xy,R_xz,R_yz
+
integer :: vx,vy,vz,vnspec
real(kind=CUSTOM_REAL), dimension(N_SLS, vx, vy, vz, vnspec) :: factor_common
real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: muvstore
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: epsilondev
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: epsilondev
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
+
real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
! local parameters
@@ -1521,11 +1603,9 @@
integer :: i_SLS
-#ifdef _HANDOPT
+#ifdef __HANDOPT_ATT
real(kind=CUSTOM_REAL) :: alphal,betal,gammal
integer :: i,j,k
-#else
- integer :: i_memory
#endif
! use Runge-Kutta scheme to march in time
@@ -1534,7 +1614,7 @@
! IMPROVE we use mu_v here even if there is some anisotropy
! IMPROVE we should probably use an average value instead
-#ifdef _HANDOPT
+#ifdef __HANDOPT_ATT
! way 2:
do i_SLS = 1,N_SLS
@@ -1551,9 +1631,26 @@
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- R_memory(:,i_SLS,i,j,k,ispec) = alphal * R_memory(:,i_SLS,i,j,k,ispec) &
+! R_memory(:,i_SLS,i,j,k,ispec) = alphal * R_memory(:,i_SLS,i,j,k,ispec) &
+! + factor_common_use(i,j,k) &
+! *( betal * epsilondev(:,i,j,k,ispec) + gammal * epsilondev_loc(:,i,j,k))
+
+ R_xx(i_SLS,i,j,k,ispec) = alphal * R_xx(i_SLS,i,j,k,ispec) &
+ factor_common_use(i,j,k) &
- *( betal * epsilondev(:,i,j,k,ispec) + gammal * epsilondev_loc(:,i,j,k))
+ *( betal * epsilondev_xx(i,j,k,ispec) + gammal * epsilondev_loc(1,i,j,k))
+ R_yy(i_SLS,i,j,k,ispec) = alphal * R_yy(i_SLS,i,j,k,ispec) &
+ + factor_common_use(i,j,k) &
+ *( betal * epsilondev_yy(i,j,k,ispec) + gammal * epsilondev_loc(2,i,j,k))
+ R_xy(i_SLS,i,j,k,ispec) = alphal * R_xy(i_SLS,i,j,k,ispec) &
+ + factor_common_use(i,j,k) &
+ *( betal * epsilondev_xy(i,j,k,ispec) + gammal * epsilondev_loc(3,i,j,k))
+ R_xz(i_SLS,i,j,k,ispec) = alphal * R_xz(i_SLS,i,j,k,ispec) &
+ + factor_common_use(i,j,k) &
+ *( betal * epsilondev_xz(i,j,k,ispec) + gammal * epsilondev_loc(4,i,j,k))
+ R_yz(i_SLS,i,j,k,ispec) = alphal * R_yz(i_SLS,i,j,k,ispec) &
+ + factor_common_use(i,j,k) &
+ *( betal * epsilondev_yz(i,j,k,ispec) + gammal * epsilondev_loc(5,i,j,k))
+
enddo
enddo
enddo
@@ -1563,11 +1660,32 @@
! way 1:
do i_SLS = 1,N_SLS
factor_common_use(:,:,:) = factor_common(i_SLS,:,:,:,ispec)
- do i_memory = 1,5
- R_memory(i_memory,i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_memory(i_memory,i_SLS,:,:,:,ispec) &
+! do i_memory = 1,5
+! R_memory(i_memory,i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_memory(i_memory,i_SLS,:,:,:,ispec) &
+! + muvstore(:,:,:,ispec) * factor_common_use(:,:,:) * &
+! (betaval(i_SLS) * epsilondev(i_memory,:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
+! enddo
+
+ R_xx(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xx(i_SLS,:,:,:,ispec) &
+ muvstore(:,:,:,ispec) * factor_common_use(:,:,:) * &
- (betaval(i_SLS) * epsilondev(i_memory,:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
- enddo
+ (betaval(i_SLS) * epsilondev_xx(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(1,:,:,:))
+
+ R_yy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yy(i_SLS,:,:,:,ispec) &
+ + muvstore(:,:,:,ispec) * factor_common_use(:,:,:) * &
+ (betaval(i_SLS) * epsilondev_yy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(2,:,:,:))
+
+ R_xy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xy(i_SLS,:,:,:,ispec) &
+ + muvstore(:,:,:,ispec) * factor_common_use(:,:,:) * &
+ (betaval(i_SLS) * epsilondev_xy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(3,:,:,:))
+
+ R_xz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xz(i_SLS,:,:,:,ispec) &
+ + muvstore(:,:,:,ispec) * factor_common_use(:,:,:) * &
+ (betaval(i_SLS) * epsilondev_xz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(4,:,:,:))
+
+ R_yz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yz(i_SLS,:,:,:,ispec) &
+ + muvstore(:,:,:,ispec) * factor_common_use(:,:,:) * &
+ (betaval(i_SLS) * epsilondev_yz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(5,:,:,:))
+
enddo
#endif
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90 (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,368 @@
+!=====================================================================
+!
+! S p e c f e m 3 D G l o b e V e r s i o n 5 . 1
+! --------------------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA
+! and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine compute_forces_acoustic()
+
+ use specfem_par
+ use specfem_par_crustmantle,only: displ_crust_mantle,b_displ_crust_mantle, &
+ ibool_crust_mantle,ibelm_bottom_crust_mantle
+ use specfem_par_innercore,only: displ_inner_core,b_displ_inner_core, &
+ ibool_inner_core,ibelm_top_inner_core
+ use specfem_par_outercore
+ implicit none
+
+ ! local parameters
+ real(kind=CUSTOM_REAL) :: time,b_time
+ ! non blocking MPI
+ ! iphase: iphase = 1 is for computing outer elements in the outer_core,
+ ! iphase = 2 is for computing inner elements in the outer core (former icall parameter)
+ integer :: iphase
+ logical :: phase_is_inner
+
+ ! compute internal forces in the fluid region
+ if(CUSTOM_REAL == SIZE_REAL) then
+ time = sngl((dble(it-1)*DT-t0)*scale_t_inv)
+ else
+ time = (dble(it-1)*DT-t0)*scale_t_inv
+ endif
+ if (SIMULATION_TYPE == 3) then
+ ! note on backward/reconstructed wavefields:
+ ! b_time for b_displ( it=1 ) corresponds to (NSTEP - 1)*DT - t0 (after Newmark scheme...)
+ ! as we start with saved wavefields b_displ( 1 ) <-> displ( NSTEP ) which correspond
+ ! to a time (NSTEP - (it-1) - 1)*DT - t0
+ ! for reconstructing the rotational contributions
+ if(CUSTOM_REAL == SIZE_REAL) then
+ b_time = sngl((dble(NSTEP-it)*DT-t0)*scale_t_inv)
+ else
+ b_time = (dble(NSTEP-it)*DT-t0)*scale_t_inv
+ endif
+ endif
+
+ ! ****************************************************
+ ! big loop over all spectral elements in the fluid
+ ! ****************************************************
+
+ ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions
+ do iphase=1,2
+
+ ! first, iphase == 1 for points on MPI interfaces (thus outer elements)
+ ! second, iphase == 2 for points purely inside partition (thus inner elements)
+ !
+ ! compute all the outer elements first, then sends out non blocking MPI communication
+ ! and continues computing inner elements (overlapping)
+ if( iphase == 1 ) then
+ phase_is_inner = .false.
+ else
+ phase_is_inner = .true.
+ endif
+
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ if( USE_DEVILLE_PRODUCTS_VAL ) then
+ ! uses Deville et al. (2002) routine
+ call compute_forces_outer_core_Dev(time,deltat,two_omega_earth, &
+ A_array_rotation,B_array_rotation, &
+ displ_outer_core,accel_outer_core,div_displ_outer_core, &
+ phase_is_inner)
+ else
+ ! div_displ_outer_core is initialized to zero in the following subroutine.
+ call compute_forces_outer_core(time,deltat,two_omega_earth, &
+ A_array_rotation,B_array_rotation, &
+ displ_outer_core,accel_outer_core,div_displ_outer_core, &
+ phase_is_inner)
+ endif
+
+ ! adjoint / kernel runs
+ if (SIMULATION_TYPE == 3) then
+ if( USE_DEVILLE_PRODUCTS_VAL ) then
+ ! uses Deville et al. (2002) routine
+ call compute_forces_outer_core_Dev(b_time,b_deltat,b_two_omega_earth, &
+ b_A_array_rotation,b_B_array_rotation, &
+ b_displ_outer_core,b_accel_outer_core,b_div_displ_outer_core, &
+ phase_is_inner)
+ else
+ call compute_forces_outer_core(b_time,b_deltat,b_two_omega_earth, &
+ b_A_array_rotation,b_B_array_rotation, &
+ b_displ_outer_core,b_accel_outer_core,b_div_displ_outer_core, &
+ phase_is_inner)
+ endif
+ endif
+
+ else
+ ! on GPU
+ call load_GPU_acoustic()
+
+ ! includes both forward and adjoint/kernel simulations
+ call compute_forces_outer_core_cuda(Mesh_pointer,iphase,time,b_time)
+
+ call load_CPU_acoustic()
+ endif
+
+
+ ! computes additional contributions to acceleration field
+ if( iphase == 1 ) then
+ ! Stacey absorbing boundaries
+ if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_outer_core()
+
+ ! ****************************************************
+ ! ********** add matching with solid part **********
+ ! ****************************************************
+ ! only for elements in first matching layer in the fluid
+ !---
+ !--- couple with mantle at the top of the outer core
+ !---
+ if(ACTUALLY_COUPLE_FLUID_CMB) &
+ call compute_coupling_fluid_CMB(displ_crust_mantle,b_displ_crust_mantle, &
+ ibool_crust_mantle,ibelm_bottom_crust_mantle, &
+ accel_outer_core,b_accel_outer_core, &
+ normal_top_outer_core,jacobian2D_top_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+ SIMULATION_TYPE,NSPEC2D_TOP(IREGION_OUTER_CORE))
+
+ !---
+ !--- couple with inner core at the bottom of the outer core
+ !---
+ if(ACTUALLY_COUPLE_FLUID_ICB) &
+ call compute_coupling_fluid_ICB(displ_inner_core,b_displ_inner_core, &
+ ibool_inner_core,ibelm_top_inner_core, &
+ accel_outer_core,b_accel_outer_core, &
+ normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+ SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_OUTER_CORE))
+ endif
+
+
+ ! assemble all the contributions between slices using MPI
+ ! in outer core
+ if( iphase == 1 ) then
+ ! sends out MPI interface data (non-blocking)
+
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call assemble_MPI_scalar_ext_mesh_s(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
+ accel_outer_core, &
+ buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
+ num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+ nibool_interfaces_outer_core,ibool_interfaces_outer_core,&
+ my_neighbours_outer_core, &
+ request_send_scalar_outer_core,request_recv_scalar_outer_core)
+ else
+ ! on GPU
+ call load_GPU_acoustic()
+ ! outer core
+ call assemble_MPI_scalar_send_cuda(NPROCTOT_VAL, &
+ buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
+ num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+ nibool_interfaces_outer_core,&
+ my_neighbours_outer_core, &
+ request_send_scalar_outer_core,request_recv_scalar_outer_core, &
+ 1) ! <-- 1 == fwd accel
+ endif
+
+ ! adjoint simulations
+ if( SIMULATION_TYPE == 3 ) then
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call assemble_MPI_scalar_ext_mesh_s(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
+ b_accel_outer_core, &
+ b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core, &
+ num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+ nibool_interfaces_outer_core,ibool_interfaces_outer_core,&
+ my_neighbours_outer_core, &
+ b_request_send_scalar_outer_core,b_request_recv_scalar_outer_core)
+ else
+ ! on GPU
+ call load_GPU_acoustic()
+ ! outer core
+ call assemble_MPI_scalar_send_cuda(NPROCTOT_VAL, &
+ b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core, &
+ num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+ nibool_interfaces_outer_core,&
+ my_neighbours_outer_core, &
+ b_request_send_scalar_outer_core,b_request_recv_scalar_outer_core, &
+ 3) ! <-- 3 == adjoint b_accel
+ endif ! GPU
+ endif ! SIMULATION_TYPE == 3
+
+ else
+ ! make sure the last communications are finished and processed
+ ! waits for send/receive requests to be completed and assembles values
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call assemble_MPI_scalar_ext_mesh_w(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
+ accel_outer_core, &
+ buffer_recv_scalar_outer_core,num_interfaces_outer_core,&
+ max_nibool_interfaces_outer_core, &
+ nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
+ request_send_scalar_outer_core,request_recv_scalar_outer_core)
+ else
+ ! on GPU
+ call assemble_MPI_scalar_write_cuda(NPROCTOT_VAL, &
+ buffer_recv_scalar_outer_core, &
+ num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+ request_send_scalar_outer_core,request_recv_scalar_outer_core, &
+ 1) ! <-- 1 == fwd accel
+ call load_CPU_acoustic()
+ endif
+
+ ! adjoint simulations
+ if( SIMULATION_TYPE == 3 ) then
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call assemble_MPI_scalar_ext_mesh_w(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
+ b_accel_outer_core, &
+ b_buffer_recv_scalar_outer_core,num_interfaces_outer_core,&
+ max_nibool_interfaces_outer_core, &
+ nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
+ b_request_send_scalar_outer_core,b_request_recv_scalar_outer_core)
+ else
+ ! on GPU
+ call assemble_MPI_scalar_write_cuda(NPROCTOT_VAL, &
+ b_buffer_recv_scalar_outer_core, &
+ num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+ b_request_send_scalar_outer_core,b_request_recv_scalar_outer_core, &
+ 3) ! <-- 3 == adjoint b_accel
+ call load_CPU_acoustic()
+ endif
+ endif ! SIMULATION_TYPE == 3
+ endif ! iphase == 1
+
+ enddo ! iphase
+
+ ! Newmark time scheme:
+ ! corrector terms for fluid parts
+ ! (multiply by the inverse of the mass matrix and update velocity)
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call compute_forces_ac_update_veloc(veloc_outer_core,accel_outer_core, &
+ deltatover2,rmass_outer_core)
+ ! adjoint / kernel runs
+ if (SIMULATION_TYPE == 3) &
+ call compute_forces_ac_update_veloc(b_veloc_outer_core,b_accel_outer_core, &
+ b_deltatover2,rmass_outer_core)
+ else
+ ! on GPU
+ call load_GPU_acoustic()
+ call kernel_3_outer_core_cuda(Mesh_pointer, &
+ deltatover2,SIMULATION_TYPE,b_deltatover2)
+ call load_CPU_acoustic()
+ endif
+
+ end subroutine compute_forces_acoustic
+
+!=====================================================================
+
+ subroutine compute_forces_ac_update_veloc(veloc_outer_core,accel_outer_core,deltatover2,rmass_outer_core)
+
+ use specfem_par,only: CUSTOM_REAL,NGLOB_OUTER_CORE
+
+#ifdef _HANDOPT
+ use specfem_par,only: imodulo_NGLOB_OUTER_CORE
+#endif
+
+ implicit none
+
+ ! velocity potential
+ real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: veloc_outer_core,accel_outer_core
+
+ ! mass matrix
+ real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: rmass_outer_core
+
+ real(kind=CUSTOM_REAL) :: deltatover2
+
+ ! local parameters
+ integer :: i
+
+ ! Newmark time scheme
+ ! multiply by the inverse of the mass matrix and update velocity
+
+#ifdef _HANDOPT_NEWMARK
+! way 2:
+ ! outer core
+ if(imodulo_NGLOB_OUTER_CORE >= 1) then
+ do i=1,imodulo_NGLOB_OUTER_CORE
+ accel_outer_core(i) = accel_outer_core(i)*rmass_outer_core(i)
+ veloc_outer_core(i) = veloc_outer_core(i) + deltatover2*accel_outer_core(i)
+ enddo
+ endif
+ do i=imodulo_NGLOB_OUTER_CORE+1,NGLOB_OUTER_CORE,3
+ accel_outer_core(i) = accel_outer_core(i)*rmass_outer_core(i)
+ veloc_outer_core(i) = veloc_outer_core(i) + deltatover2*accel_outer_core(i)
+
+ accel_outer_core(i+1) = accel_outer_core(i+1)*rmass_outer_core(i+1)
+ veloc_outer_core(i+1) = veloc_outer_core(i+1) + deltatover2*accel_outer_core(i+1)
+
+ accel_outer_core(i+2) = accel_outer_core(i+2)*rmass_outer_core(i+2)
+ veloc_outer_core(i+2) = veloc_outer_core(i+2) + deltatover2*accel_outer_core(i+2)
+ enddo
+#else
+! way 1:
+ do i=1,NGLOB_OUTER_CORE
+ accel_outer_core(i) = accel_outer_core(i)*rmass_outer_core(i)
+ veloc_outer_core(i) = veloc_outer_core(i) + deltatover2*accel_outer_core(i)
+ enddo
+#endif
+
+ end subroutine compute_forces_ac_update_veloc
+
+!=====================================================================
+
+ subroutine load_GPU_acoustic
+
+ use specfem_par
+ use specfem_par_outercore
+ implicit none
+
+ ! daniel: TODO - temporary transfers to the GPU
+ call transfer_fields_oc_to_device(NGLOB_OUTER_CORE,displ_outer_core, &
+ veloc_outer_core,accel_outer_core,Mesh_pointer)
+
+ if( SIMULATION_TYPE == 3 ) then
+ call transfer_b_fields_oc_to_device(NGLOB_OUTER_CORE,b_displ_outer_core, &
+ b_veloc_outer_core,b_accel_outer_core,Mesh_pointer)
+ endif
+
+ end subroutine
+
+!=====================================================================
+
+ subroutine load_CPU_acoustic
+
+ use specfem_par
+ use specfem_par_outercore
+ implicit none
+
+ ! daniel: TODO - temporary transfers to the GPU
+ call transfer_fields_oc_from_device(NGLOB_OUTER_CORE,displ_outer_core, &
+ veloc_outer_core,accel_outer_core,Mesh_pointer)
+
+ if( SIMULATION_TYPE == 3 ) then
+ call transfer_b_fields_oc_from_device(NGLOB_OUTER_CORE,b_displ_outer_core, &
+ b_veloc_outer_core,b_accel_outer_core,Mesh_pointer)
+ endif
+
+ end subroutine
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -25,133 +25,107 @@
!
!=====================================================================
- subroutine compute_forces_crust_mantle(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_crust_mantle,accel_crust_mantle,xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-!----------------------
- is_on_a_slice_edge_crust_mantle,icall, &
- accel_inner_core,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
- c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
- c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
- c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
- ibool,ispec_is_tiso, &
- !-- idoubling,
- R_memory,epsilondev,epsilon_trace_over_3,one_minus_sum_beta, &
- alphaval,betaval,gammaval,factor_common,vx,vy,vz,vnspec)
+ subroutine compute_forces_crust_mantle(displ_crust_mantle,accel_crust_mantle, &
+ phase_is_inner, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz, &
+ epsilon_trace_over_3, &
+ alphaval,betaval,gammaval, &
+ factor_common,vx,vy,vz,vnspec)
- implicit none
+ use constants
- include "constants.h"
+ use specfem_par,only: &
+ hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+ minus_gravity_table,density_table,minus_deriv_gravity_table, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ npoin2D_max_all_CM_IC,INCLUDE_CENTRAL_CUBE
-! include values created by the mesher
-! done for performance only using static allocation to allow for loop unrolling
- include "OUTPUT_FILES/values_from_mesher.h"
+ use specfem_par_crustmantle,only: &
+ xstore => xstore_crust_mantle,ystore => ystore_crust_mantle,zstore => zstore_crust_mantle, &
+ xix => xix_crust_mantle,xiy => xiy_crust_mantle,xiz => xiz_crust_mantle, &
+ etax => etax_crust_mantle,etay => etay_crust_mantle,etaz => etaz_crust_mantle, &
+ gammax => gammax_crust_mantle,gammay => gammay_crust_mantle,gammaz => gammaz_crust_mantle, &
+ kappavstore => kappavstore_crust_mantle,kappahstore => kappahstore_crust_mantle, &
+ muvstore => muvstore_crust_mantle,muhstore => muhstore_crust_mantle, &
+ eta_anisostore => eta_anisostore_crust_mantle, &
+ c11store => c11store_crust_mantle,c12store => c12store_crust_mantle,c13store => c13store_crust_mantle, &
+ c14store => c14store_crust_mantle,c15store => c15store_crust_mantle,c16store => c16store_crust_mantle, &
+ c22store => c22store_crust_mantle,c23store => c23store_crust_mantle,c24store => c24store_crust_mantle, &
+ c25store => c25store_crust_mantle,c26store => c26store_crust_mantle,c33store => c33store_crust_mantle, &
+ c34store => c34store_crust_mantle,c35store => c35store_crust_mantle,c36store => c36store_crust_mantle, &
+ c44store => c44store_crust_mantle,c45store => c45store_crust_mantle,c46store => c46store_crust_mantle, &
+ c55store => c55store_crust_mantle,c56store => c56store_crust_mantle,c66store => c66store_crust_mantle, &
+ ibool => ibool_crust_mantle, &
+ ispec_is_tiso => ispec_is_tiso_crust_mantle, &
+ one_minus_sum_beta => one_minus_sum_beta_crust_mantle, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ phase_ispec_inner => phase_ispec_inner_crust_mantle, &
+ nspec_outer => nspec_outer_crust_mantle, &
+ nspec_inner => nspec_inner_crust_mantle
-! model_attenuation_variables
-! type model_attenuation_variables
-! sequence
-! double precision min_period, max_period
-! double precision :: QT_c_source ! Source Frequency
-! double precision, dimension(:), pointer :: Qtau_s ! tau_sigma
-! double precision, dimension(:), pointer :: QrDisc ! Discontinutitues Defined
-! double precision, dimension(:), pointer :: Qr ! Radius
-! double precision, dimension(:), pointer :: Qmu ! Shear Attenuation
-! double precision, dimension(:,:), pointer :: Qtau_e ! tau_epsilon
-! double precision, dimension(:), pointer :: Qomsb, Qomsb2 ! one_minus_sum_beta
-! double precision, dimension(:,:), pointer :: Qfc, Qfc2 ! factor_common
-! double precision, dimension(:), pointer :: Qsf, Qsf2 ! scale_factor
-! integer, dimension(:), pointer :: Qrmin ! Max and Mins of idoubling
-! integer, dimension(:), pointer :: Qrmax ! Max and Mins of idoubling
-! integer, dimension(:), pointer :: interval_Q ! Steps
-! integer :: Qn ! Number of points
-! integer dummy_pad ! padding 4 bytes to align the structure
-! end type model_attenuation_variables
+ use specfem_par_innercore,only: &
+ ibool_inner_core,idoubling_inner_core, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices, &
+ ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC
-! array with the local to global mapping per slice
-! integer, dimension(NSPEC_CRUST_MANTLE) :: idoubling
- logical, dimension(NSPEC_CRUST_MANTLE) :: ispec_is_tiso
+ implicit none
-! displacement and acceleration
+ ! displacement and acceleration
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: displ_crust_mantle,accel_crust_mantle
-! memory variables for attenuation
-! memory variables R_ij are stored at the local rather than global level
-! to allow for optimization of cache access by compiler
- integer i_SLS,i_memory
-! variable sized array variables for one_minus_sum_beta and factor_common
- integer vx, vy, vz, vnspec
- real(kind=CUSTOM_REAL) one_minus_sum_beta_use,minus_sum_beta
- real(kind=CUSTOM_REAL), dimension(vx, vy, vz, vnspec) :: one_minus_sum_beta
+ ! memory variables for attenuation
+ ! memory variables R_ij are stored at the local rather than global level
+ ! to allow for optimization of cache access by compiler
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
+ R_xx,R_yy,R_xy,R_xz,R_yz
-! for attenuation
- real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilondev
-! [alpha,beta,gamma]val reduced to N_SLS and factor_common to N_SLS*NUM_NODES
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
- real(kind=CUSTOM_REAL), dimension(N_SLS, vx, vy, vz, vnspec) :: factor_common
- real(kind=CUSTOM_REAL), dimension(NGLLX, NGLLY, NGLLZ) :: factor_common_c44_muv
-
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
- real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
-
-! arrays with mesh parameters per slice
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilondev
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+ epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
-! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
- real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+ real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
-! x y and z contain r theta and phi
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: xstore,ystore,zstore
+ ! variable sized array variables for one_minus_sum_beta and factor_common
+ integer vx, vy, vz, vnspec
+ ! [alpha,beta,gamma]val reduced to N_SLS and factor_common to N_SLS*NUM_NODES
+ real(kind=CUSTOM_REAL), dimension(N_SLS, vx, vy, vz, vnspec) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
- kappavstore,muvstore
+ ! inner/outer element run flag
+ logical :: phase_is_inner
-! store anisotropic properties only where needed to save memory
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
- kappahstore,muhstore,eta_anisostore
+ ! local parameters
-! arrays for full anisotropy only when needed
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: &
- c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
- c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
- c36store,c44store,c45store,c46store,c55store,c56store,c66store
+ ! for attenuation
+ real(kind=CUSTOM_REAL) one_minus_sum_beta_use,minus_sum_beta
+ real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
+ real(kind=CUSTOM_REAL), dimension(NGLLX, NGLLY, NGLLZ) :: factor_common_c44_muv
+ real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+ tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+
integer ispec,iglob,ispec_strain
integer i,j,k,l
+ integer i_SLS
-! the 21 coefficients for an anisotropic medium in reduced notation
+ ! the 21 coefficients for an anisotropic medium in reduced notation
real(kind=CUSTOM_REAL) c11,c22,c33,c44,c55,c66,c12,c13,c23,c14,c24,c34,c15,c25,c35,c45,c16,c26,c36,c46,c56
real(kind=CUSTOM_REAL) rhovphsq,sinphifour,cosphisq,sinphisq,costhetasq,rhovsvsq,sinthetasq, &
@@ -159,8 +133,8 @@
costwotheta,cosfourtheta,sintwophisq,costheta,sinphi,sintheta,cosphi, &
sintwotheta,costwophi,sintwophi,costwothetasq,costwophisq,phi,theta
- real(kind=CUSTOM_REAL) two_rhovpvsq,two_rhovphsq,two_rhovsvsq,two_rhovshsq
- real(kind=CUSTOM_REAL) four_rhovpvsq,four_rhovphsq,four_rhovsvsq,four_rhovshsq
+ real(kind=CUSTOM_REAL) two_rhovsvsq,two_rhovshsq ! two_rhovpvsq,two_rhovphsq
+ real(kind=CUSTOM_REAL) four_rhovsvsq,four_rhovshsq ! four_rhovpvsq,four_rhovphsq,
real(kind=CUSTOM_REAL) twoetaminone,etaminone,eta_aniso
real(kind=CUSTOM_REAL) two_eta_aniso,four_eta_aniso,six_eta_aniso
@@ -183,7 +157,7 @@
real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l
real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l
-! for gravity
+ ! for gravity
integer int_radius
real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
double precision radius,rho,minus_g,minus_dg
@@ -192,113 +166,31 @@
double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
double precision factor,sx_l,sy_l,sz_l,gxl,gyl,gzl
double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
- double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table,density_table,minus_deriv_gravity_table
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
-! this for non blocking MPI
- integer :: iphase,icall
+! integer :: computed_elements
+ integer :: num_elements,ispec_p
+ integer :: iphase
- integer :: computed_elements
-
- logical, dimension(NSPEC_CRUST_MANTLE) :: is_on_a_slice_edge_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: accel_inner_core
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
-
- integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
-
- integer :: ichunk,iproc_xi,iproc_eta,myrank
-
- integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
-
- integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
- integer npoin2D_faces_inner_core(NUMFACES_SHARED)
-
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- npoin2D_xi_inner_core,npoin2D_eta_inner_core
-
-! communication pattern for faces between chunks
- integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
-
-! communication pattern for corners between chunks
- integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-
- integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
- integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
-
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
-
- integer :: npoin2D_max_all_CM_IC
- real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC) :: buffer_send_faces,buffer_received_faces
-
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector
-
-! for matching with central cube in inner core
- integer nb_msgs_theor_in_cube, npoin2D_cube_from_slices,iphase_CC
- integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
- double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices
- double precision, dimension(npoin2D_cube_from_slices,NDIM,nb_msgs_theor_in_cube) :: buffer_all_cube_from_slices
- integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
- integer receiver_cube_from_slices
- logical :: INCLUDE_CENTRAL_CUBE
-
-! local to global mapping
- integer NSPEC2D_BOTTOM_INNER_CORE
- integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
-
! ****************************************************
! big loop over all spectral elements in the solid
! ****************************************************
- computed_elements = 0
+! computed_elements = 0
+ if( .not. phase_is_inner ) then
+ iphase = 1
+ num_elements = nspec_outer
+ else
+ iphase = 2
+ num_elements = nspec_inner
+ endif
- do ispec = 1,NSPEC_CRUST_MANTLE
+ do ispec_p = 1,num_elements
-! hide communications by computing the edges first
- if((icall == 2 .and. is_on_a_slice_edge_crust_mantle(ispec)) .or. &
- (icall == 1 .and. .not. is_on_a_slice_edge_crust_mantle(ispec))) cycle
+ ispec = phase_ispec_inner(ispec_p,iphase)
-! process the communications every ELEMENTS_NONBLOCKING elements
- computed_elements = computed_elements + 1
- if (USE_NONBLOCKING_COMMS .and. icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_CM_IC) == 0) then
+ ! only compute element which belong to current phase (inner or outer elements)
- if(iphase <= 7) call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
- NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_CM, &
- NGLOB1D_RADIAL_IC,NCHUNKS_VAL,iphase)
-
- if(INCLUDE_CENTRAL_CUBE) then
- if(iphase > 7 .and. iphase_CC <= 4) &
- call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
- ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,accel_inner_core,NDIM,iphase_CC)
- endif
-
- endif
-
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -549,13 +441,13 @@
four_eta_aniso = 4.*eta_aniso
six_eta_aniso = 6.*eta_aniso
- two_rhovpvsq = 2.*rhovpvsq
- two_rhovphsq = 2.*rhovphsq
+ !two_rhovpvsq = 2.*rhovpvsq
+ !two_rhovphsq = 2.*rhovphsq
two_rhovsvsq = 2.*rhovsvsq
two_rhovshsq = 2.*rhovshsq
- four_rhovpvsq = 4.*rhovpvsq
- four_rhovphsq = 4.*rhovphsq
+ !four_rhovpvsq = 4.*rhovpvsq
+ !four_rhovphsq = 4.*rhovphsq
four_rhovsvsq = 4.*rhovsvsq
four_rhovshsq = 4.*rhovshsq
@@ -693,14 +585,14 @@
! subtract memory variables if attenuation
if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. ) ) then
do i_SLS = 1,N_SLS
- R_xx_val = R_memory(1,i_SLS,i,j,k,ispec)
- R_yy_val = R_memory(2,i_SLS,i,j,k,ispec)
+ R_xx_val = R_xx(i_SLS,i,j,k,ispec)
+ R_yy_val = R_yy(i_SLS,i,j,k,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_memory(3,i_SLS,i,j,k,ispec)
- sigma_xz = sigma_xz - R_memory(4,i_SLS,i,j,k,ispec)
- sigma_yz = sigma_yz - R_memory(5,i_SLS,i,j,k,ispec)
+ sigma_xy = sigma_xy - R_xy(i_SLS,i,j,k,ispec)
+ sigma_xz = sigma_xz - R_xz(i_SLS,i,j,k,ispec)
+ sigma_yz = sigma_yz - R_yz(i_SLS,i,j,k,ispec)
enddo
endif
@@ -890,9 +782,7 @@
do j=1,NGLLY
do i=1,NGLLX
iglob = ibool(i,j,k,ispec)
- accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,i,j,k)
- accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,i,j,k)
- accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,i,j,k)
+ accel_crust_mantle(:,iglob) = accel_crust_mantle(:,iglob) + sum_terms(:,i,j,k)
enddo
enddo
enddo
@@ -916,26 +806,42 @@
! use Runge-Kutta scheme to march in time
do i_SLS = 1,N_SLS
- do i_memory = 1,5
! get coefficients for that standard linear solid
! IMPROVE we use mu_v here even if there is some anisotropy
! IMPROVE we should probably use an average value instead
- ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
- factor_common_c44_muv = factor_common(i_SLS,:,:,:,ispec)
- if(ANISOTROPIC_3D_MANTLE_VAL) then
- factor_common_c44_muv = factor_common_c44_muv * c44store(:,:,:,ispec)
- else
- factor_common_c44_muv = factor_common_c44_muv * muvstore(:,:,:,ispec)
- endif
+ ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
+ factor_common_c44_muv = factor_common(i_SLS,:,:,:,ispec)
+ if(ANISOTROPIC_3D_MANTLE_VAL) then
+ factor_common_c44_muv = factor_common_c44_muv * c44store(:,:,:,ispec)
+ else
+ factor_common_c44_muv = factor_common_c44_muv * muvstore(:,:,:,ispec)
+ endif
- R_memory(i_memory,i_SLS,:,:,:,ispec) = alphaval(i_SLS) * &
- R_memory(i_memory,i_SLS,:,:,:,ispec) + &
- factor_common_c44_muv * &
- (betaval(i_SLS) * epsilondev(i_memory,:,:,:,ispec) + &
- gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
- enddo
+! do i_memory = 1,5
+! R_memory(i_memory,i_SLS,:,:,:,ispec) = alphaval(i_SLS) * &
+! R_memory(i_memory,i_SLS,:,:,:,ispec) + &
+! factor_common_c44_muv * &
+! (betaval(i_SLS) * epsilondev(i_memory,:,:,:,ispec) + &
+! gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
+! enddo
+
+ R_xx(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xx(i_SLS,:,:,:,ispec) + factor_common_c44_muv * &
+ (betaval(i_SLS) * epsilondev_xx(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(1,:,:,:))
+
+ R_yy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yy(i_SLS,:,:,:,ispec) + factor_common_c44_muv * &
+ (betaval(i_SLS) * epsilondev_yy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(2,:,:,:))
+
+ R_xy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xy(i_SLS,:,:,:,ispec) + factor_common_c44_muv * &
+ (betaval(i_SLS) * epsilondev_xy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(3,:,:,:))
+
+ R_xz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xz(i_SLS,:,:,:,ispec) + factor_common_c44_muv * &
+ (betaval(i_SLS) * epsilondev_xz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(4,:,:,:))
+
+ R_yz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yz(i_SLS,:,:,:,ispec) + factor_common_c44_muv * &
+ (betaval(i_SLS) * epsilondev_yz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(5,:,:,:))
+
enddo
endif
@@ -946,7 +852,11 @@
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- epsilondev(:,i,j,k,ispec) = epsilondev_loc(:,i,j,k)
+ epsilondev_xx(i,j,k,ispec) = epsilondev_loc(1,i,j,k)
+ epsilondev_yy(i,j,k,ispec) = epsilondev_loc(2,i,j,k)
+ epsilondev_xy(i,j,k,ispec) = epsilondev_loc(3,i,j,k)
+ epsilondev_xz(i,j,k,ispec) = epsilondev_loc(4,i,j,k)
+ epsilondev_yz(i,j,k,ispec) = epsilondev_loc(5,i,j,k)
enddo
enddo
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -30,112 +30,96 @@
! or compile with: -D_HANDOPT
!#define _HANDOPT
-! BEWARE:
-! BEWARE: we have observed that using _HANDOPT in combination with -O3 or higher can lead to problems on some machines;
-! BEWARE: thus, be careful when using it. At the very least, run the same test simulation once with _HANDOPT and once without
-! BEWARE: and make sure that all the seismograms you get are the same down to roundoff noise.
-! BEWARE:
-
! note: these hand optimizations should help compilers to pipeline the code and make better use of the cache;
! depending on compilers, it can further decrease the computation time by ~ 30%.
! the original routines are commented with "! way 1", the hand-optimized routines with "! way 2"
- subroutine compute_forces_crust_mantle_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_crust_mantle,accel_crust_mantle,xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-!----------------------
- is_on_a_slice_edge_crust_mantle,icall, &
- accel_inner_core,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_xxT, &
- hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
- c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
- c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
- c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
- ibool,ispec_is_tiso, &
- ! --idoubling,
- R_memory,epsilondev,epsilon_trace_over_3,one_minus_sum_beta, &
- alphaval,betaval,gammaval,factor_common,vx,vy,vz,vnspec)
+ subroutine compute_forces_crust_mantle_Dev( displ_crust_mantle,accel_crust_mantle, &
+ phase_is_inner, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz, &
+ epsilon_trace_over_3, &
+ alphaval,betaval,gammaval, &
+ factor_common,vx,vy,vz,vnspec)
! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
- implicit none
+ use constants
- include "constants.h"
+ use specfem_par,only: &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+ minus_gravity_table,density_table,minus_deriv_gravity_table, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ npoin2D_max_all_CM_IC,INCLUDE_CENTRAL_CUBE
- ! include values created by the mesher
- ! done for performance only using static allocation to allow for loop unrolling
- include "OUTPUT_FILES/values_from_mesher.h"
+ use specfem_par_crustmantle,only: &
+ xstore => xstore_crust_mantle,ystore => ystore_crust_mantle,zstore => zstore_crust_mantle, &
+ xix => xix_crust_mantle,xiy => xiy_crust_mantle,xiz => xiz_crust_mantle, &
+ etax => etax_crust_mantle,etay => etay_crust_mantle,etaz => etaz_crust_mantle, &
+ gammax => gammax_crust_mantle,gammay => gammay_crust_mantle,gammaz => gammaz_crust_mantle, &
+ kappavstore => kappavstore_crust_mantle,kappahstore => kappahstore_crust_mantle, &
+ muvstore => muvstore_crust_mantle,muhstore => muhstore_crust_mantle, &
+ eta_anisostore => eta_anisostore_crust_mantle, &
+ c11store => c11store_crust_mantle,c12store => c12store_crust_mantle,c13store => c13store_crust_mantle, &
+ c14store => c14store_crust_mantle,c15store => c15store_crust_mantle,c16store => c16store_crust_mantle, &
+ c22store => c22store_crust_mantle,c23store => c23store_crust_mantle,c24store => c24store_crust_mantle, &
+ c25store => c25store_crust_mantle,c26store => c26store_crust_mantle,c33store => c33store_crust_mantle, &
+ c34store => c34store_crust_mantle,c35store => c35store_crust_mantle,c36store => c36store_crust_mantle, &
+ c44store => c44store_crust_mantle,c45store => c45store_crust_mantle,c46store => c46store_crust_mantle, &
+ c55store => c55store_crust_mantle,c56store => c56store_crust_mantle,c66store => c66store_crust_mantle, &
+ ibool => ibool_crust_mantle, &
+ ispec_is_tiso => ispec_is_tiso_crust_mantle, &
+ one_minus_sum_beta => one_minus_sum_beta_crust_mantle, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ phase_ispec_inner => phase_ispec_inner_crust_mantle, &
+ nspec_outer => nspec_outer_crust_mantle, &
+ nspec_inner => nspec_inner_crust_mantle
+ use specfem_par_innercore,only: &
+ ibool_inner_core,idoubling_inner_core, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices, &
+ ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC
+
+ implicit none
+
! displacement and acceleration
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: displ_crust_mantle,accel_crust_mantle
- ! arrays with mesh parameters per slice
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool
- ! x y and z contain r theta and phi
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: xstore,ystore,zstore
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-
- ! array with derivatives of Lagrange polynomials and precalculated products
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xxT,hprimewgll_xxT
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
- ! store anisotropic properties only where needed to save memory
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
- kappahstore,muhstore,eta_anisostore
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
- kappavstore,muvstore
-
- ! arrays for full anisotropy only when needed
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: &
- c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
- c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
- c36store,c44store,c45store,c46store,c55store,c56store,c66store
-
! attenuation
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilondev
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
+ R_xx,R_yy,R_xy,R_xz,R_yz
+
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilondev
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
+
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
integer :: vx,vy,vz,vnspec
! [alpha,beta,gamma]val reduced to N_SLS and factor_common to N_SLS*NUM_NODES
real(kind=CUSTOM_REAL), dimension(N_SLS, vx, vy, vz, vnspec) :: factor_common
- real(kind=CUSTOM_REAL), dimension(vx, vy, vz, vnspec) :: one_minus_sum_beta
real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
- ! array with the local to global mapping per slice
- logical, dimension(NSPEC_CRUST_MANTLE) :: ispec_is_tiso
+ ! inner/outer element run flag
+ logical :: phase_is_inner
- ! gravity
- double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table,density_table,minus_deriv_gravity_table
+ ! local parameters
-! local parameters
! Deville
! manually inline the calls to the Deville et al. (2002) routines
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
@@ -181,72 +165,11 @@
! for gravity
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
- integer :: ispec
- integer :: i,j,k
- integer :: iglob1
+ integer :: ispec,i,j,k,iglob1
+! integer :: computed_elements
+ integer :: num_elements,ispec_p
+ integer :: iphase
-
-! this for non blocking MPI
- integer :: iphase,icall
-
- integer :: computed_elements
-
- logical, dimension(NSPEC_CRUST_MANTLE) :: is_on_a_slice_edge_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: accel_inner_core
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
-
- integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
-
- integer :: ichunk,iproc_xi,iproc_eta,myrank
-
- integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
-
- integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
- integer npoin2D_faces_inner_core(NUMFACES_SHARED)
-
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- npoin2D_xi_inner_core,npoin2D_eta_inner_core
-
-! communication pattern for faces between chunks
- integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
-
-! communication pattern for corners between chunks
- integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-
- integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
- integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
-
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
-
- integer :: npoin2D_max_all_CM_IC
- real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC) :: buffer_send_faces,buffer_received_faces
-
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector
-
-! for matching with central cube in inner core
- integer nb_msgs_theor_in_cube, npoin2D_cube_from_slices,iphase_CC
- integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
- double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices
- double precision, dimension(npoin2D_cube_from_slices,NDIM,nb_msgs_theor_in_cube) :: buffer_all_cube_from_slices
- integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
- integer receiver_cube_from_slices
- logical :: INCLUDE_CENTRAL_CUBE
-
-! local to global mapping
- integer NSPEC2D_BOTTOM_INNER_CORE
- integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
-
#ifdef _HANDOPT
integer, dimension(5) :: iglobv5
#endif
@@ -255,44 +178,21 @@
! big loop over all spectral elements in the solid
! ****************************************************
- computed_elements = 0
+! computed_elements = 0
+ if( .not. phase_is_inner ) then
+ iphase = 1
+ num_elements = nspec_outer
+ else
+ iphase = 2
+ num_elements = nspec_inner
+ endif
- do ispec = 1,NSPEC_CRUST_MANTLE
+ do ispec_p = 1,num_elements
-! hide communications by computing the edges first
- if((icall == 2 .and. is_on_a_slice_edge_crust_mantle(ispec)) .or. &
- (icall == 1 .and. .not. is_on_a_slice_edge_crust_mantle(ispec))) cycle
+ ispec = phase_ispec_inner(ispec_p,iphase)
-! process the communications every ELEMENTS_NONBLOCKING elements
- computed_elements = computed_elements + 1
- if (USE_NONBLOCKING_COMMS .and. icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_CM_IC) == 0) then
+ ! only compute element which belong to current phase (inner or outer elements)
- if(iphase <= 7) call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
- NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_CM, &
- NGLOB1D_RADIAL_IC,NCHUNKS_VAL,iphase)
-
- if(INCLUDE_CENTRAL_CUBE) then
- if(iphase > 7 .and. iphase_CC <= 4) &
- call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
- ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,accel_inner_core,NDIM,iphase_CC)
- endif
-
- endif
-
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
@@ -416,7 +316,8 @@
c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
ibool, &
- R_memory,epsilon_trace_over_3, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilon_trace_over_3, &
one_minus_sum_beta,vx,vy,vz,vnspec, &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
@@ -430,7 +331,8 @@
wgll_cube, &
kappavstore,muvstore, &
ibool, &
- R_memory,epsilon_trace_over_3, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilon_trace_over_3, &
one_minus_sum_beta,vx,vy,vz,vnspec, &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
@@ -443,7 +345,8 @@
wgll_cube, &
kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
ibool, &
- R_memory,epsilon_trace_over_3, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilon_trace_over_3, &
one_minus_sum_beta,vx,vy,vz,vnspec, &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
@@ -581,17 +484,23 @@
if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. ) ) then
! updates R_memory
- call compute_element_att_memory_cr(ispec,R_memory, &
+ call compute_element_att_memory_cr(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, &
vx,vy,vz,vnspec,factor_common, &
alphaval,betaval,gammaval, &
c44store,muvstore, &
- epsilondev,epsilondev_loc)
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz, &
+ epsilondev_loc)
endif
! save deviatoric strain for Runge-Kutta scheme
if(COMPUTE_AND_STORE_STRAIN) then
- epsilondev(:,:,:,:,ispec) = epsilondev_loc(:,:,:,:)
+ epsilondev_xx(:,:,:,ispec) = epsilondev_loc(1,:,:,:)
+ epsilondev_yy(:,:,:,ispec) = epsilondev_loc(2,:,:,:)
+ epsilondev_xy(:,:,:,ispec) = epsilondev_loc(3,:,:,:)
+ epsilondev_xz(:,:,:,ispec) = epsilondev_loc(4,:,:,:)
+ epsilondev_yz(:,:,:,ispec) = epsilondev_loc(5,:,:,:)
endif
enddo ! spectral element loop NSPEC_CRUST_MANTLE
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,767 @@
+!=====================================================================
+!
+! S p e c f e m 3 D G l o b e V e r s i o n 5 . 1
+! --------------------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA
+! and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine compute_forces_elastic()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore,only: accel_outer_core,b_accel_outer_core, &
+ normal_top_outer_core,jacobian2D_top_outer_core, &
+ normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+ ibelm_top_outer_core,ibelm_bottom_outer_core, &
+ ibool_outer_core
+ use specfem_par_movie
+ implicit none
+
+ ! local parameters
+ ! non blocking MPI
+ ! iphase: iphase = 1 is for computing outer elements in the crust_mantle and inner_core regions,
+ ! iphase = 2 is for computing inner elements (former icall parameter)
+ integer :: iphase
+ logical :: phase_is_inner
+
+ ! ****************************************************
+ ! big loop over all spectral elements in the solid
+ ! ****************************************************
+
+ ! compute internal forces in the solid regions
+
+ ! for anisotropy and gravity, x y and z contain r theta and phi
+
+ ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions
+ do iphase = 1,2
+
+ ! first, iphase == 1 for points on MPI interfaces (thus outer elements)
+ ! second, iphase == 2 for points purely inside partition (thus inner elements)
+ !
+ ! compute all the outer elements first, then sends out non blocking MPI communication
+ ! and continues computing inner elements (overlapping)
+ if( iphase == 1 ) then
+ phase_is_inner = .false.
+ else
+ phase_is_inner = .true.
+ endif
+
+
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+
+ ! compute internal forces in the solid regions
+ ! note: for anisotropy and gravity, x y and z contain r theta and phi
+ if( USE_DEVILLE_PRODUCTS_VAL ) then
+ ! uses Deville (2002) optimizations
+ ! crust/mantle region
+ call compute_forces_crust_mantle_Dev( displ_crust_mantle,accel_crust_mantle, &
+ phase_is_inner, &
+ R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle, &
+ R_xz_crust_mantle,R_yz_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ eps_trace_over_3_crust_mantle, &
+ alphaval,betaval,gammaval,factor_common_crust_mantle, &
+ size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
+ size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
+ ! inner core region
+ call compute_forces_inner_core_Dev( displ_inner_core,accel_inner_core, &
+ phase_is_inner, &
+ R_xx_inner_core,R_yy_inner_core,R_xy_inner_core,R_xz_inner_core,R_yz_inner_core, &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+ epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
+ eps_trace_over_3_inner_core,&
+ alphaval,betaval,gammaval, &
+ factor_common_inner_core, &
+ size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
+ size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
+
+ else
+ ! no Deville optimization
+ ! crust/mantle region
+ call compute_forces_crust_mantle( displ_crust_mantle,accel_crust_mantle, &
+ phase_is_inner, &
+ R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle, &
+ R_xz_crust_mantle,R_yz_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ eps_trace_over_3_crust_mantle, &
+ alphaval,betaval,gammaval,factor_common_crust_mantle, &
+ size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
+ size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
+ ! inner core region
+ call compute_forces_inner_core( displ_inner_core,accel_inner_core, &
+ phase_is_inner, &
+ R_xx_inner_core,R_yy_inner_core,R_xy_inner_core,R_xz_inner_core,R_yz_inner_core, &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+ epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
+ eps_trace_over_3_inner_core,&
+ alphaval,betaval,gammaval, &
+ factor_common_inner_core, &
+ size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
+ size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
+
+ endif
+
+ ! adjoint / kernel runs
+ if (SIMULATION_TYPE == 3 ) then
+ if( USE_DEVILLE_PRODUCTS_VAL ) then
+ ! uses Deville (2002) optimizations
+ ! crust/mantle region
+ call compute_forces_crust_mantle_Dev( b_displ_crust_mantle,b_accel_crust_mantle, &
+ phase_is_inner, &
+ b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
+ b_R_xz_crust_mantle,b_R_yz_crust_mantle, &
+ b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,&
+ b_epsilondev_xy_crust_mantle, &
+ b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
+ b_eps_trace_over_3_crust_mantle, &
+ b_alphaval,b_betaval,b_gammaval,factor_common_crust_mantle, &
+ size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
+ size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
+ ! inner core region
+ call compute_forces_inner_core_Dev( b_displ_inner_core,b_accel_inner_core, &
+ phase_is_inner, &
+ b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
+ b_R_xz_inner_core,b_R_yz_inner_core, &
+ b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+ b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
+ b_eps_trace_over_3_inner_core,&
+ b_alphaval,b_betaval,b_gammaval, &
+ factor_common_inner_core, &
+ size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
+ size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
+
+ else
+ ! no Deville optimization
+ ! crust/mantle region
+ call compute_forces_crust_mantle( b_displ_crust_mantle,b_accel_crust_mantle, &
+ phase_is_inner, &
+ b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
+ b_R_xz_crust_mantle,b_R_yz_crust_mantle, &
+ b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,&
+ b_epsilondev_xy_crust_mantle, &
+ b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
+ b_eps_trace_over_3_crust_mantle, &
+ b_alphaval,b_betaval,b_gammaval,factor_common_crust_mantle, &
+ size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
+ size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
+
+ ! inner core region
+ call compute_forces_inner_core( b_displ_inner_core,b_accel_inner_core, &
+ phase_is_inner, &
+ b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
+ b_R_xz_inner_core,b_R_yz_inner_core, &
+ b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+ b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
+ b_eps_trace_over_3_inner_core,&
+ b_alphaval,b_betaval,b_gammaval, &
+ factor_common_inner_core, &
+ size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
+ size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
+ endif
+ endif !SIMULATION_TYPE == 3
+
+ else
+ ! on GPU
+ call load_GPU_elastic()
+
+ ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
+ ! for crust/mantle
+ call compute_forces_crust_mantle_cuda(Mesh_pointer,iphase)
+ ! for inner core
+ call compute_forces_inner_core_cuda(Mesh_pointer,iphase)
+
+ call load_CPU_elastic()
+ endif ! GPU_MODE
+
+
+
+ ! computes additional contributions to acceleration field
+ if( iphase == 1 ) then
+
+ ! absorbing boundaries
+ ! Stacey
+ if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_crust_mantle()
+
+ ! add the sources
+ if (SIMULATION_TYPE == 1) &
+ call compute_add_sources(myrank,NSOURCES, &
+ accel_crust_mantle,sourcearrays, &
+ DT,t0,tshift_cmt,hdur_gaussian,ibool_crust_mantle, &
+ islice_selected_source,ispec_selected_source,it, &
+ hdur,xi_source,eta_source,gamma_source,nu_source)
+
+ ! add adjoint sources
+ if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
+ if( nadj_rec_local > 0 ) &
+ call compute_add_sources_adjoint(myrank,nrec, &
+ nadj_rec_local,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC, &
+ accel_crust_mantle,adj_sourcearrays, &
+ nu,xi_receiver,eta_receiver,gamma_receiver, &
+ xigll,yigll,zigll,ibool_crust_mantle, &
+ islice_selected_rec,ispec_selected_rec, &
+ NSTEP_SUB_ADJ,iadjsrc_len,iadjsrc,iadj_vec, &
+ it,it_begin,station_name,network_name,DT)
+ endif
+
+ ! add sources for backward/reconstructed wavefield
+ if (SIMULATION_TYPE == 3) &
+ call compute_add_sources_backward(myrank,NSOURCES,NSTEP, &
+ b_accel_crust_mantle,sourcearrays, &
+ DT,t0,tshift_cmt,hdur_gaussian,ibool_crust_mantle, &
+ islice_selected_source,ispec_selected_source,it, &
+ hdur,xi_source,eta_source,gamma_source,nu_source)
+
+
+ ! NOISE_TOMOGRAPHY
+ if ( NOISE_TOMOGRAPHY == 1 ) then
+ ! the first step of noise tomography is to use |S(\omega)|^2 as a point force source at one of the receivers.
+ ! hence, instead of a moment tensor 'sourcearrays', a 'noise_sourcearray' for a point force is needed.
+ ! furthermore, the CMTSOLUTION needs to be zero, i.e., no earthquakes.
+ ! now this must be manually set in DATA/CMTSOLUTION, by USERS.
+ call add_source_master_rec_noise(myrank,nrec, &
+ NSTEP,accel_crust_mantle,noise_sourcearray, &
+ ibool_crust_mantle,islice_selected_rec,ispec_selected_rec, &
+ it,irec_master_noise)
+ elseif ( NOISE_TOMOGRAPHY == 2 ) then
+ ! second step of noise tomography, i.e., read the surface movie saved at every timestep
+ ! use the movie to drive the ensemble forward wavefield
+ call noise_read_add_surface_movie(nmovie_points,accel_crust_mantle, &
+ normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
+ ibelm_top_crust_mantle,ibool_crust_mantle, &
+ NSPEC2D_TOP(IREGION_CRUST_MANTLE),noise_surface_movie, &
+ NSTEP-it+1,jacobian2D_top_crust_mantle,wgllwgll_xy)
+ ! be careful, since ensemble forward sources are reversals of generating wavefield "eta"
+ ! hence the "NSTEP-it+1", i.e., start reading from the last timestep
+ ! note the ensemble forward sources are generally distributed on the surface of the earth
+ ! that's to say, the ensemble forward source is kind of a surface force density, not a body force density
+ ! therefore, we must add it here, before applying the inverse of mass matrix
+ elseif ( NOISE_TOMOGRAPHY == 3 ) then
+ ! third step of noise tomography, i.e., read the surface movie saved at every timestep
+ ! use the movie to reconstruct the ensemble forward wavefield
+ ! the ensemble adjoint wavefield is done as usual
+ ! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal
+ call noise_read_add_surface_movie(nmovie_points,b_accel_crust_mantle, &
+ normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
+ ibelm_top_crust_mantle,ibool_crust_mantle, &
+ NSPEC2D_TOP(IREGION_CRUST_MANTLE),noise_surface_movie, &
+ it,jacobian2D_top_crust_mantle,wgllwgll_xy)
+ endif
+
+
+ ! ****************************************************
+ ! ********** add matching with fluid part **********
+ ! ****************************************************
+
+ ! only for elements in first matching layer in the solid
+
+ !---
+ !--- couple with outer core at the bottom of the mantle
+ !---
+ if(ACTUALLY_COUPLE_FLUID_CMB) &
+ call compute_coupling_CMB_fluid(displ_crust_mantle,b_displ_crust_mantle, &
+ accel_crust_mantle,b_accel_crust_mantle, &
+ ibool_crust_mantle,ibelm_bottom_crust_mantle, &
+ accel_outer_core,b_accel_outer_core, &
+ normal_top_outer_core,jacobian2D_top_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+ RHO_TOP_OC,minus_g_cmb, &
+ SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE))
+
+ !---
+ !--- couple with outer core at the top of the inner core
+ !---
+ if(ACTUALLY_COUPLE_FLUID_ICB) &
+ call compute_coupling_ICB_fluid(displ_inner_core,b_displ_inner_core, &
+ accel_inner_core,b_accel_inner_core, &
+ ibool_inner_core,ibelm_top_inner_core, &
+ accel_outer_core,b_accel_outer_core, &
+ normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+ RHO_BOTTOM_OC,minus_g_icb, &
+ SIMULATION_TYPE,NSPEC2D_TOP(IREGION_INNER_CORE))
+
+ endif ! iphase == 1
+
+
+ ! assemble all the contributions between slices using MPI
+
+ ! crust/mantle and inner core handled in the same call
+ ! in order to reduce the number of MPI messages by 2
+
+ if( iphase == 1 ) then
+ ! sends out MPI interface data
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ ! sends accel values to corresponding MPI interface neighbors
+ ! crust mantle
+ call assemble_MPI_vector_ext_mesh_s(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+ accel_crust_mantle, &
+ buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
+ num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+ nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,&
+ my_neighbours_crust_mantle, &
+ request_send_vector_crust_mantle,request_recv_vector_crust_mantle)
+ ! inner core
+ call assemble_MPI_vector_ext_mesh_s(NPROCTOT_VAL,NGLOB_INNER_CORE, &
+ accel_inner_core, &
+ buffer_send_vector_inner_core,buffer_recv_vector_inner_core, &
+ num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+ nibool_interfaces_inner_core,ibool_interfaces_inner_core,&
+ my_neighbours_inner_core, &
+ request_send_vector_inner_core,request_recv_vector_inner_core)
+ else
+ ! on GPU
+ call load_GPU_elastic()
+ ! crust mantle
+ call assemble_MPI_vector_send_cuda(NPROCTOT_VAL, &
+ buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
+ num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+ nibool_interfaces_crust_mantle,&
+ my_neighbours_crust_mantle, &
+ request_send_vector_crust_mantle,request_recv_vector_crust_mantle, &
+ IREGION_CRUST_MANTLE, &
+ 1) ! <-- 1 == fwd accel
+ ! inner core
+ call assemble_MPI_vector_send_cuda(NPROCTOT_VAL, &
+ buffer_send_vector_inner_core,buffer_recv_vector_inner_core, &
+ num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+ nibool_interfaces_inner_core,&
+ my_neighbours_inner_core, &
+ request_send_vector_inner_core,request_recv_vector_inner_core, &
+ IREGION_INNER_CORE, &
+ 1)
+ endif ! GPU_MODE
+
+ ! adjoint / kernel runs
+ if (SIMULATION_TYPE == 3) then
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ ! sends accel values to corresponding MPI interface neighbors
+ ! crust mantle
+ call assemble_MPI_vector_ext_mesh_s(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+ b_accel_crust_mantle, &
+ b_buffer_send_vector_crust_mantle,b_buffer_recv_vector_crust_mantle, &
+ num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+ nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,&
+ my_neighbours_crust_mantle, &
+ b_request_send_vector_crust_mantle,b_request_recv_vector_crust_mantle)
+ ! inner core
+ call assemble_MPI_vector_ext_mesh_s(NPROCTOT_VAL,NGLOB_INNER_CORE, &
+ b_accel_inner_core, &
+ b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core, &
+ num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+ nibool_interfaces_inner_core,ibool_interfaces_inner_core,&
+ my_neighbours_inner_core, &
+ b_request_send_vector_inner_core,b_request_recv_vector_inner_core)
+ else
+ ! on GPU
+ call load_GPU_elastic()
+ ! crust mantle
+ call assemble_MPI_vector_send_cuda(NPROCTOT_VAL, &
+ b_buffer_send_vector_crust_mantle,b_buffer_recv_vector_crust_mantle, &
+ num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+ nibool_interfaces_crust_mantle,&
+ my_neighbours_crust_mantle, &
+ b_request_send_vector_crust_mantle,b_request_recv_vector_crust_mantle, &
+ IREGION_CRUST_MANTLE, &
+ 3) ! <-- 3 == adjoint b_accel
+ ! inner core
+ call assemble_MPI_vector_send_cuda(NPROCTOT_VAL, &
+ b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core, &
+ num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+ nibool_interfaces_inner_core,&
+ my_neighbours_inner_core, &
+ b_request_send_vector_inner_core,b_request_recv_vector_inner_core, &
+ IREGION_INNER_CORE, &
+ 3)
+ endif ! GPU
+ endif ! SIMULATION_TYPE == 3
+
+ else
+ ! waits for send/receive requests to be completed and assembles values
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ ! crust mantle
+ call assemble_MPI_vector_ext_mesh_w(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+ accel_crust_mantle, &
+ buffer_recv_vector_crust_mantle,num_interfaces_crust_mantle,&
+ max_nibool_interfaces_crust_mantle, &
+ nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
+ request_send_vector_crust_mantle,request_recv_vector_crust_mantle)
+ ! inner core
+ call assemble_MPI_vector_ext_mesh_w(NPROCTOT_VAL,NGLOB_INNER_CORE, &
+ accel_inner_core, &
+ buffer_recv_vector_inner_core,num_interfaces_inner_core,&
+ max_nibool_interfaces_inner_core, &
+ nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
+ request_send_vector_inner_core,request_recv_vector_inner_core)
+ else
+ ! on GPU
+ ! crust mantle
+ call assemble_MPI_vector_write_cuda(NPROCTOT_VAL, &
+ buffer_recv_vector_crust_mantle, &
+ num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+ request_send_vector_crust_mantle,request_recv_vector_crust_mantle, &
+ IREGION_CRUST_MANTLE, &
+ 1) ! <-- 1 == fwd accel
+ ! inner core
+ call assemble_MPI_vector_write_cuda(NPROCTOT_VAL, &
+ buffer_recv_vector_inner_core, &
+ num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+ request_send_vector_inner_core,request_recv_vector_inner_core, &
+ IREGION_INNER_CORE, &
+ 1)
+ call load_CPU_elastic()
+ endif
+
+
+ ! adjoint / kernel runs
+ if (SIMULATION_TYPE == 3) then
+ ! waits for send/receive requests to be completed and assembles values
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ ! crust mantle
+ call assemble_MPI_vector_ext_mesh_w(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+ b_accel_crust_mantle, &
+ b_buffer_recv_vector_crust_mantle,num_interfaces_crust_mantle,&
+ max_nibool_interfaces_crust_mantle, &
+ nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
+ b_request_send_vector_crust_mantle,b_request_recv_vector_crust_mantle)
+ ! inner core
+ call assemble_MPI_vector_ext_mesh_w(NPROCTOT_VAL,NGLOB_INNER_CORE, &
+ b_accel_inner_core, &
+ b_buffer_recv_vector_inner_core,num_interfaces_inner_core,&
+ max_nibool_interfaces_inner_core, &
+ nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
+ b_request_send_vector_inner_core,b_request_recv_vector_inner_core)
+
+ else
+ ! on GPU
+ ! crust mantle
+ call assemble_MPI_vector_write_cuda(NPROCTOT_VAL, &
+ b_buffer_recv_vector_crust_mantle, &
+ num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+ b_request_send_vector_crust_mantle,b_request_recv_vector_crust_mantle, &
+ IREGION_CRUST_MANTLE, &
+ 3) ! <-- 3 == adjoint b_accel
+ ! inner core
+ call assemble_MPI_vector_write_cuda(NPROCTOT_VAL,&
+ b_buffer_recv_vector_inner_core, &
+ num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+ b_request_send_vector_inner_core,b_request_recv_vector_inner_core, &
+ IREGION_INNER_CORE, &
+ 3)
+ call load_CPU_elastic()
+ endif
+ endif ! SIMULATION_TYPE == 3
+ endif ! iphase == 1
+
+ enddo ! iphase
+
+ ! updates (only) acceleration w/ rotation in the crust/mantle region (touches oceans)
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call compute_forces_el_update_accel(veloc_crust_mantle,accel_crust_mantle, &
+ two_omega_earth,rmass_crust_mantle)
+ ! adjoint / kernel runs
+ if (SIMULATION_TYPE == 3) &
+ call compute_forces_el_update_accel(b_veloc_crust_mantle,b_accel_crust_mantle, &
+ b_two_omega_earth,rmass_crust_mantle)
+ else
+ ! on GPU
+ call load_GPU_elastic()
+ call kernel_3_a_cuda(Mesh_pointer, &
+ deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS_VAL)
+ call load_CPU_elastic()
+ endif
+
+ ! couples ocean with crust mantle
+ ! (updates acceleration with ocean load approximation)
+ if(OCEANS_VAL) &
+ call compute_coupling_ocean(accel_crust_mantle,b_accel_crust_mantle, &
+ rmass_crust_mantle,rmass_ocean_load,normal_top_crust_mantle, &
+ ibool_crust_mantle,ibelm_top_crust_mantle, &
+ updated_dof_ocean_load, &
+ SIMULATION_TYPE,NSPEC2D_TOP(IREGION_CRUST_MANTLE))
+
+ ! Newmark time scheme:
+ ! corrector terms for elastic parts
+ ! (updates velocity)
+ if(.NOT. GPU_MODE ) then
+ ! on CPU
+ call compute_forces_el_update_veloc(veloc_crust_mantle,accel_crust_mantle, &
+ veloc_inner_core,accel_inner_core, &
+ deltatover2,two_omega_earth,rmass_inner_core)
+ ! adjoint / kernel runs
+ if (SIMULATION_TYPE == 3) &
+ call compute_forces_el_update_veloc(b_veloc_crust_mantle,b_accel_crust_mantle, &
+ b_veloc_inner_core,b_accel_inner_core, &
+ b_deltatover2,b_two_omega_earth,rmass_inner_core)
+ else
+ ! on GPU
+ call load_GPU_elastic()
+ call kernel_3_b_cuda(Mesh_pointer, &
+ deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS_VAL)
+ call load_CPU_elastic()
+ endif
+
+ end subroutine compute_forces_elastic
+
+
+!=====================================================================
+
+ subroutine compute_forces_el_update_accel(veloc_crust_mantle,accel_crust_mantle, &
+ two_omega_earth,rmass_crust_mantle)
+
+ use specfem_par,only: CUSTOM_REAL,NGLOB_CRUST_MANTLE,NDIM
+
+#ifdef _HANDOPT
+ use specfem_par,only: imodulo_NGLOB_CRUST_MANTLE4
+#endif
+
+ implicit none
+
+ ! velocity & acceleration
+ ! crust/mantle region
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: veloc_crust_mantle,accel_crust_mantle
+
+ ! mass matrix
+ real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: rmass_crust_mantle
+
+ real(kind=CUSTOM_REAL) :: two_omega_earth
+
+ ! local parameters
+ integer :: i
+
+ ! updates acceleration w/ rotation in crust/mantle region only
+
+#ifdef _HANDOPT_NEWMARK
+! way 2:
+ if(imodulo_NGLOB_CRUST_MANTLE4 >= 1) then
+ do i=1,imodulo_NGLOB_CRUST_MANTLE4
+ accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmass_crust_mantle(i) &
+ + two_omega_earth*veloc_crust_mantle(2,i)
+ accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmass_crust_mantle(i) &
+ - two_omega_earth*veloc_crust_mantle(1,i)
+ accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmass_crust_mantle(i)
+ enddo
+ endif
+ do i=imodulo_NGLOB_CRUST_MANTLE4+1,NGLOB_CRUST_MANTLE,4
+ accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmass_crust_mantle(i) &
+ + two_omega_earth*veloc_crust_mantle(2,i)
+ accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmass_crust_mantle(i) &
+ - two_omega_earth*veloc_crust_mantle(1,i)
+ accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmass_crust_mantle(i)
+
+ accel_crust_mantle(1,i+1) = accel_crust_mantle(1,i+1)*rmass_crust_mantle(i+1) &
+ + two_omega_earth*veloc_crust_mantle(2,i+1)
+ accel_crust_mantle(2,i+1) = accel_crust_mantle(2,i+1)*rmass_crust_mantle(i+1) &
+ - two_omega_earth*veloc_crust_mantle(1,i+1)
+ accel_crust_mantle(3,i+1) = accel_crust_mantle(3,i+1)*rmass_crust_mantle(i+1)
+
+ accel_crust_mantle(1,i+2) = accel_crust_mantle(1,i+2)*rmass_crust_mantle(i+2) &
+ + two_omega_earth*veloc_crust_mantle(2,i+2)
+ accel_crust_mantle(2,i+2) = accel_crust_mantle(2,i+2)*rmass_crust_mantle(i+2) &
+ - two_omega_earth*veloc_crust_mantle(1,i+2)
+ accel_crust_mantle(3,i+2) = accel_crust_mantle(3,i+2)*rmass_crust_mantle(i+2)
+
+ accel_crust_mantle(1,i+3) = accel_crust_mantle(1,i+3)*rmass_crust_mantle(i+3) &
+ + two_omega_earth*veloc_crust_mantle(2,i+3)
+ accel_crust_mantle(2,i+3) = accel_crust_mantle(2,i+3)*rmass_crust_mantle(i+3) &
+ - two_omega_earth*veloc_crust_mantle(1,i+3)
+ accel_crust_mantle(3,i+3) = accel_crust_mantle(3,i+3)*rmass_crust_mantle(i+3)
+ enddo
+#else
+! way 1:
+ do i=1,NGLOB_CRUST_MANTLE
+ accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmass_crust_mantle(i) &
+ + two_omega_earth*veloc_crust_mantle(2,i)
+ accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmass_crust_mantle(i) &
+ - two_omega_earth*veloc_crust_mantle(1,i)
+ accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmass_crust_mantle(i)
+ enddo
+#endif
+
+ end subroutine compute_forces_el_update_accel
+
+
+!=====================================================================
+
+ subroutine compute_forces_el_update_veloc(veloc_crust_mantle,accel_crust_mantle, &
+ veloc_inner_core,accel_inner_core, &
+ deltatover2,two_omega_earth,rmass_inner_core)
+
+ use specfem_par,only: CUSTOM_REAL,NGLOB_CRUST_MANTLE,NGLOB_INNER_CORE,NDIM
+
+#ifdef _HANDOPT
+ use specfem_par,only: imodulo_NGLOB_CRUST_MANTLE4,imodulo_NGLOB_INNER_CORE
+#endif
+
+ implicit none
+
+ ! acceleration & velocity
+ ! crust/mantle region
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: veloc_crust_mantle,accel_crust_mantle
+ ! inner core
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: veloc_inner_core,accel_inner_core
+
+ ! mass matrix
+ real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: rmass_inner_core
+
+ real(kind=CUSTOM_REAL) :: deltatover2,two_omega_earth
+
+ ! local parameters
+ integer :: i
+
+ ! Newmark time scheme:
+ !
+ ! note:
+ ! - crust/mantle region
+ ! needs only velocity corrector terms
+ ! (acceleration already updated before)
+ ! - inner core region
+ ! needs both, acceleration update & velocity corrector terms
+
+#ifdef _HANDOPT_NEWMARK
+! way 2:
+ ! crust/mantle region
+ if( imodulo_NGLOB_CRUST_MANTLE4 >= 1 ) then
+ do i=1,imodulo_NGLOB_CRUST_MANTLE4
+ veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
+ enddo
+ endif
+ do i=imodulo_NGLOB_CRUST_MANTLE4+1,NGLOB_CRUST_MANTLE,4
+ veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
+ veloc_crust_mantle(:,i+1) = veloc_crust_mantle(:,i+1) + deltatover2*accel_crust_mantle(:,i+1)
+ veloc_crust_mantle(:,i+2) = veloc_crust_mantle(:,i+2) + deltatover2*accel_crust_mantle(:,i+2)
+ veloc_crust_mantle(:,i+3) = veloc_crust_mantle(:,i+3) + deltatover2*accel_crust_mantle(:,i+3)
+ enddo
+
+ ! inner core region
+ if(imodulo_NGLOB_INNER_CORE >= 1) then
+ do i=1,imodulo_NGLOB_INNER_CORE
+ accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
+ + two_omega_earth*veloc_inner_core(2,i)
+ accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
+ - two_omega_earth*veloc_inner_core(1,i)
+ accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
+
+ veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
+ enddo
+ endif
+ do i=imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE,3
+ accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
+ + two_omega_earth*veloc_inner_core(2,i)
+ accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
+ - two_omega_earth*veloc_inner_core(1,i)
+ accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
+
+ veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
+
+ accel_inner_core(1,i+1) = accel_inner_core(1,i+1)*rmass_inner_core(i+1) &
+ + two_omega_earth*veloc_inner_core(2,i+1)
+ accel_inner_core(2,i+1) = accel_inner_core(2,i+1)*rmass_inner_core(i+1) &
+ - two_omega_earth*veloc_inner_core(1,i+1)
+ accel_inner_core(3,i+1) = accel_inner_core(3,i+1)*rmass_inner_core(i+1)
+
+ veloc_inner_core(:,i+1) = veloc_inner_core(:,i+1) + deltatover2*accel_inner_core(:,i+1)
+
+ accel_inner_core(1,i+2) = accel_inner_core(1,i+2)*rmass_inner_core(i+2) &
+ + two_omega_earth*veloc_inner_core(2,i+2)
+ accel_inner_core(2,i+2) = accel_inner_core(2,i+2)*rmass_inner_core(i+2) &
+ - two_omega_earth*veloc_inner_core(1,i+2)
+ accel_inner_core(3,i+2) = accel_inner_core(3,i+2)*rmass_inner_core(i+2)
+
+ veloc_inner_core(:,i+2) = veloc_inner_core(:,i+2) + deltatover2*accel_inner_core(:,i+2)
+ enddo
+#else
+! way 1:
+ ! mantle
+ do i=1,NGLOB_CRUST_MANTLE
+ veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
+ enddo
+ ! inner core
+ do i=1,NGLOB_INNER_CORE
+ accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
+ + two_omega_earth*veloc_inner_core(2,i)
+ accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
+ - two_omega_earth*veloc_inner_core(1,i)
+ accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
+
+ veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
+ enddo
+#endif
+
+ end subroutine compute_forces_el_update_veloc
+
+!=====================================================================
+
+ subroutine load_GPU_elastic
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ implicit none
+
+ ! daniel: TODO - temporary transfers to the GPU
+ call transfer_fields_cm_to_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle, &
+ veloc_crust_mantle,accel_crust_mantle,Mesh_pointer)
+ call transfer_fields_ic_to_device(NDIM*NGLOB_INNER_CORE,displ_inner_core, &
+ veloc_inner_core,accel_inner_core,Mesh_pointer)
+
+ if( SIMULATION_TYPE == 3 ) then
+ call transfer_b_fields_cm_to_device(NDIM*NGLOB_CRUST_MANTLE,b_displ_crust_mantle, &
+ b_veloc_crust_mantle,b_accel_crust_mantle,Mesh_pointer)
+ call transfer_b_fields_ic_to_device(NDIM*NGLOB_INNER_CORE,b_displ_inner_core, &
+ b_veloc_inner_core,b_accel_inner_core,Mesh_pointer)
+ endif
+
+ end subroutine
+
+!=====================================================================
+
+ subroutine load_CPU_elastic
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ implicit none
+
+ ! daniel: TODO - temporary transfers back to the CPU
+ call transfer_fields_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle, &
+ veloc_crust_mantle,accel_crust_mantle,Mesh_pointer)
+ call transfer_fields_ic_from_device(NDIM*NGLOB_INNER_CORE,displ_inner_core, &
+ veloc_inner_core,accel_inner_core,Mesh_pointer)
+
+ if( SIMULATION_TYPE == 3 ) then
+ call transfer_b_fields_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,b_displ_crust_mantle, &
+ b_veloc_crust_mantle,b_accel_crust_mantle,Mesh_pointer)
+ call transfer_b_fields_ic_from_device(NDIM*NGLOB_INNER_CORE,b_displ_inner_core, &
+ b_veloc_inner_core,b_accel_inner_core,Mesh_pointer)
+ endif
+
+ end subroutine
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -25,92 +25,91 @@
!
!=====================================================================
- subroutine compute_forces_inner_core(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_inner_core,accel_inner_core,xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-!----------------------
- is_on_a_slice_edge_inner_core,icall, &
- accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore,muvstore,ibool,idoubling, &
- c11store,c33store,c12store,c13store,c44store,R_memory,epsilondev,epsilon_trace_over_3,&
- one_minus_sum_beta,alphaval,betaval,gammaval,factor_common, &
- vx,vy,vz,vnspec)
+ subroutine compute_forces_inner_core( displ_inner_core,accel_inner_core, &
+ phase_is_inner, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz, &
+ epsilon_trace_over_3,&
+ alphaval,betaval,gammaval,factor_common, &
+ vx,vy,vz,vnspec)
- implicit none
+ use constants
- include "constants.h"
+ use specfem_par,only: &
+ hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+ minus_gravity_table,density_table,minus_deriv_gravity_table, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ npoin2D_max_all_CM_IC,INCLUDE_CENTRAL_CUBE
-! include values created by the mesher
-! done for performance only using static allocation to allow for loop unrolling
- include "OUTPUT_FILES/values_from_mesher.h"
+ use specfem_par_innercore,only: &
+ xstore => xstore_inner_core,ystore => ystore_inner_core,zstore => zstore_inner_core, &
+ xix => xix_inner_core,xiy => xiy_inner_core,xiz => xiz_inner_core, &
+ etax => etax_inner_core,etay => etay_inner_core,etaz => etaz_inner_core, &
+ gammax => gammax_inner_core,gammay => gammay_inner_core,gammaz => gammaz_inner_core, &
+ kappavstore => kappavstore_inner_core, &
+ muvstore => muvstore_inner_core, &
+ c11store => c11store_inner_core,c12store => c12store_inner_core,c13store => c13store_inner_core, &
+ c33store => c33store_inner_core,c44store => c44store_inner_core, &
+ ibool => ibool_inner_core,idoubling => idoubling_inner_core, &
+ one_minus_sum_beta => one_minus_sum_beta_inner_core, &
+ ibool_inner_core,idoubling_inner_core, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices, &
+ ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC, &
+ phase_ispec_inner => phase_ispec_inner_inner_core, &
+ nspec_outer => nspec_outer_inner_core, &
+ nspec_inner => nspec_inner_inner_core
-! displacement and acceleration
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: displ_inner_core,accel_inner_core
+ use specfem_par_crustmantle,only: &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle
-! for attenuation
-! memory variables R_ij are stored at the local rather than global level
-! to allow for optimization of cache access by compiler
- integer i_SLS,i_memory
- real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
+ implicit none
-! variable lengths for factor_common and one_minus_sum_beta
- integer vx, vy, vz, vnspec
+ ! displacement and acceleration
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: displ_inner_core,accel_inner_core
- real(kind=CUSTOM_REAL), dimension(vx, vy, vz, vnspec) :: one_minus_sum_beta
-
+ ! for attenuation
+ ! memory variables R_ij are stored at the local rather than global level
+ ! to allow for optimization of cache access by compiler
+ ! variable lengths for factor_common and one_minus_sum_beta
+ integer vx, vy, vz, vnspec
real(kind=CUSTOM_REAL), dimension(N_SLS, vx, vy, vz, vnspec) :: factor_common
real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
- real(kind=CUSTOM_REAL), dimension(NGLLX, NGLLY, NGLLZ) :: factor_common_use
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: R_memory
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: epsilondev
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: epsilon_trace_over_3
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: R_memory
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
+ R_xx,R_yy,R_xy,R_xz,R_yz
-! array with the local to global mapping per slice
- integer, dimension(NSPEC_INNER_CORE) :: idoubling
-! arrays with mesh parameters per slice
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: xix,xiy,xiz, &
- etax,etay,etaz,gammax,gammay,gammaz
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: epsilondev
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
-! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
- real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: epsilon_trace_over_3
+ ! inner/outer element run flag
+ logical :: phase_is_inner
+
+ ! local parameters
+
+ real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX, NGLLY, NGLLZ) :: factor_common_use
+ real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
+ integer i_SLS
+
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: kappavstore,muvstore
-
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
-! c11store,c33store,c12store,c13store,c44store
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_IC) :: &
- c11store,c33store,c12store,c13store,c44store
-
integer ispec,iglob,ispec_strain
integer i,j,k,l
@@ -135,126 +134,43 @@
real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l
real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l
-! for gravity
- integer int_radius
- real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
+ ! for gravity
double precision radius,rho,minus_g,minus_dg
double precision minus_g_over_radius,minus_dg_plus_g_over_radius
double precision cos_theta,sin_theta,cos_phi,sin_phi
double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
double precision theta,phi,factor,gxl,gyl,gzl,sx_l,sy_l,sz_l
double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
- double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table,density_table,minus_deriv_gravity_table
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
- real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: xstore,ystore,zstore
+ real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
+ integer :: int_radius
-! this for non blocking MPI
- integer :: iphase,icall
+! integer :: computed_elements
+ integer :: num_elements,ispec_p
+ integer :: iphase
- integer :: computed_elements
-
- logical, dimension(NSPEC_INNER_CORE) :: is_on_a_slice_edge_inner_core
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: accel_crust_mantle
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
-
- integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
-
- integer :: ichunk,iproc_xi,iproc_eta,myrank
-
- integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
-
- integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
- integer npoin2D_faces_inner_core(NUMFACES_SHARED)
-
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- npoin2D_xi_inner_core,npoin2D_eta_inner_core
-
-! communication pattern for faces between chunks
- integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
-
-! communication pattern for corners between chunks
- integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-
- integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
- integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
-
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
-
- integer :: npoin2D_max_all_CM_IC
- real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC) :: buffer_send_faces,buffer_received_faces
-
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector
-
-! for matching with central cube in inner core
- integer nb_msgs_theor_in_cube, npoin2D_cube_from_slices,iphase_CC
- integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
- double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices
- double precision, dimension(npoin2D_cube_from_slices,NDIM,nb_msgs_theor_in_cube) :: buffer_all_cube_from_slices
- integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
- integer receiver_cube_from_slices
- logical :: INCLUDE_CENTRAL_CUBE
-
-! local to global mapping
- integer NSPEC2D_BOTTOM_INNER_CORE
- integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
-
! ****************************************************
! big loop over all spectral elements in the solid
! ****************************************************
- computed_elements = 0
+! computed_elements = 0
+ if( .not. phase_is_inner ) then
+ iphase = 1
+ num_elements = nspec_outer
+ else
+ iphase = 2
+ num_elements = nspec_inner
+ endif
- do ispec = 1,NSPEC_INNER_CORE
+ do ispec_p = 1,num_elements
-! hide communications by computing the edges first
- if((icall == 2 .and. is_on_a_slice_edge_inner_core(ispec)) .or. &
- (icall == 1 .and. .not. is_on_a_slice_edge_inner_core(ispec))) cycle
+ ispec = phase_ispec_inner(ispec_p,iphase)
-! exclude fictitious elements in central cube
+ ! only compute element which belong to current phase (inner or outer elements)
+
+ ! exclude fictitious elements in central cube
if(idoubling(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then
-! process the communications every ELEMENTS_NONBLOCKING elements
- computed_elements = computed_elements + 1
- if (USE_NONBLOCKING_COMMS .and. icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_CM_IC) == 0) then
-
- if(iphase <= 7) call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
- NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_CM, &
- NGLOB1D_RADIAL_IC,NCHUNKS_VAL,iphase)
-
- if(INCLUDE_CENTRAL_CUBE) then
- if(iphase > 7 .and. iphase_CC <= 4) &
- call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
- ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,accel_inner_core,NDIM,iphase_CC)
- endif
-
- endif
-
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -422,14 +338,14 @@
! subtract memory variables if attenuation
if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. ) ) then
do i_SLS = 1,N_SLS
- R_xx_val = R_memory(1,i_SLS,i,j,k,ispec)
- R_yy_val = R_memory(2,i_SLS,i,j,k,ispec)
+ R_xx_val = R_xx(i_SLS,i,j,k,ispec)
+ R_yy_val = R_yy(i_SLS,i,j,k,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_memory(3,i_SLS,i,j,k,ispec)
- sigma_xz = sigma_xz - R_memory(4,i_SLS,i,j,k,ispec)
- sigma_yz = sigma_yz - R_memory(5,i_SLS,i,j,k,ispec)
+ sigma_xy = sigma_xy - R_xy(i_SLS,i,j,k,ispec)
+ sigma_xz = sigma_xz - R_xz(i_SLS,i,j,k,ispec)
+ sigma_yz = sigma_yz - R_yz(i_SLS,i,j,k,ispec)
enddo
endif
@@ -649,14 +565,36 @@
do i_SLS = 1,N_SLS
factor_common_use = factor_common(i_SLS,:,:,:,ispec)
- do i_memory = 1,5
- R_memory(i_memory,i_SLS,:,:,:,ispec) = &
- alphaval(i_SLS) * &
- R_memory(i_memory,i_SLS,:,:,:,ispec) + muvstore(:,:,:,ispec) * &
- factor_common_use * &
- (betaval(i_SLS) * &
- epsilondev(i_memory,:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
- enddo
+! do i_memory = 1,5
+! R_memory(i_memory,i_SLS,:,:,:,ispec) = &
+! alphaval(i_SLS) * &
+! R_memory(i_memory,i_SLS,:,:,:,ispec) + muvstore(:,:,:,ispec) * &
+! factor_common_use * &
+! (betaval(i_SLS) * &
+! epsilondev(i_memory,:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
+! enddo
+
+ R_xx(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xx(i_SLS,:,:,:,ispec) &
+ + muvstore(:,:,:,ispec) * factor_common_use * &
+ (betaval(i_SLS) * epsilondev_xx(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(1,:,:,:))
+
+ R_yy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yy(i_SLS,:,:,:,ispec) &
+ + muvstore(:,:,:,ispec) * factor_common_use * &
+ (betaval(i_SLS) * epsilondev_yy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(2,:,:,:))
+
+ R_xy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xy(i_SLS,:,:,:,ispec) &
+ + muvstore(:,:,:,ispec) * factor_common_use * &
+ (betaval(i_SLS) * epsilondev_xy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(3,:,:,:))
+
+ R_xz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xz(i_SLS,:,:,:,ispec) &
+ + muvstore(:,:,:,ispec) * factor_common_use * &
+ (betaval(i_SLS) * epsilondev_xz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(4,:,:,:))
+
+ R_yz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yz(i_SLS,:,:,:,ispec) &
+ + muvstore(:,:,:,ispec) * factor_common_use * &
+ (betaval(i_SLS) * epsilondev_yz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(5,:,:,:))
+
+
enddo
endif
@@ -667,7 +605,11 @@
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- epsilondev(:,i,j,k,ispec) = epsilondev_loc(:,i,j,k)
+ epsilondev_xx(i,j,k,ispec) = epsilondev_loc(1,i,j,k)
+ epsilondev_yy(i,j,k,ispec) = epsilondev_loc(2,i,j,k)
+ epsilondev_xy(i,j,k,ispec) = epsilondev_loc(3,i,j,k)
+ epsilondev_xz(i,j,k,ispec) = epsilondev_loc(4,i,j,k)
+ epsilondev_yz(i,j,k,ispec) = epsilondev_loc(5,i,j,k)
enddo
enddo
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -30,99 +30,88 @@
! or compile with: -D_HANDOPT
!#define _HANDOPT
-! BEWARE:
-! BEWARE: we have observed that using _HANDOPT in combination with -O3 or higher can lead to problems on some machines;
-! BEWARE: thus, be careful when using it. At the very least, run the same test simulation once with _HANDOPT and once without
-! BEWARE: and make sure that all the seismograms you get are the same down to roundoff noise.
-! BEWARE:
-
! note: these hand optimizations should help compilers to pipeline the code and make better use of the cache;
! depending on compilers, it can further decrease the computation time by ~ 30%.
! the original routines are commented with "! way 1", the hand-optimized routines with "! way 2"
- subroutine compute_forces_inner_core_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_inner_core,accel_inner_core,xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-!----------------------
- is_on_a_slice_edge_inner_core,icall, &
- accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore,muvstore,ibool,idoubling, &
- c11store,c33store,c12store,c13store,c44store,R_memory,epsilondev,epsilon_trace_over_3,&
- one_minus_sum_beta,alphaval,betaval,gammaval,factor_common, &
- vx,vy,vz,vnspec)
+ subroutine compute_forces_inner_core_Dev( displ_inner_core,accel_inner_core, &
+ phase_is_inner, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz, &
+ epsilon_trace_over_3,&
+ alphaval,betaval,gammaval,factor_common, &
+ vx,vy,vz,vnspec)
! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
- implicit none
+ use constants
- include "constants.h"
+ use specfem_par,only: &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+ minus_gravity_table,density_table,minus_deriv_gravity_table, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ npoin2D_max_all_CM_IC,INCLUDE_CENTRAL_CUBE
-! include values created by the mesher
-! done for performance only using static allocation to allow for loop unrolling
- include "OUTPUT_FILES/values_from_mesher.h"
+ use specfem_par_innercore,only: &
+ xstore => xstore_inner_core,ystore => ystore_inner_core,zstore => zstore_inner_core, &
+ xix => xix_inner_core,xiy => xiy_inner_core,xiz => xiz_inner_core, &
+ etax => etax_inner_core,etay => etay_inner_core,etaz => etaz_inner_core, &
+ gammax => gammax_inner_core,gammay => gammay_inner_core,gammaz => gammaz_inner_core, &
+ kappavstore => kappavstore_inner_core, &
+ muvstore => muvstore_inner_core, &
+ c11store => c11store_inner_core,c12store => c12store_inner_core,c13store => c13store_inner_core, &
+ c33store => c33store_inner_core,c44store => c44store_inner_core, &
+ ibool => ibool_inner_core,idoubling => idoubling_inner_core, &
+ one_minus_sum_beta => one_minus_sum_beta_inner_core, &
+ ibool_inner_core,idoubling_inner_core, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices, &
+ ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC, &
+ phase_ispec_inner => phase_ispec_inner_inner_core, &
+ nspec_outer => nspec_outer_inner_core, &
+ nspec_inner => nspec_inner_inner_core
- ! displacement and acceleration
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: displ_inner_core,accel_inner_core
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool
- real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: xstore,ystore,zstore
+ use specfem_par_crustmantle,only: &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle
- ! arrays with mesh parameters per slice
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: xix,xiy,xiz, &
- etax,etay,etaz,gammax,gammay,gammaz
+ implicit none
+ ! displacement and acceleration
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: displ_inner_core,accel_inner_core
+
! for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
! variable lengths for factor_common and one_minus_sum_beta
integer vx, vy, vz, vnspec
real(kind=CUSTOM_REAL), dimension(N_SLS, vx, vy, vz, vnspec) :: factor_common
- real(kind=CUSTOM_REAL), dimension(vx, vy, vz, vnspec) :: one_minus_sum_beta
-
real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: R_memory
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: epsilondev
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: epsilon_trace_over_3
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: R_memory
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
+ R_xx,R_yy,R_xy,R_xz,R_yz
- ! array with derivatives of Lagrange polynomials and precalculated products
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xxT,hprimewgll_xxT
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: epsilondev
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: kappavstore,muvstore
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: epsilon_trace_over_3
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
-! c11store,c33store,c12store,c13store,c44store
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_IC) :: &
- c11store,c33store,c12store,c13store,c44store
+ ! inner/outer element run flag
+ logical :: phase_is_inner
- ! array with the local to global mapping per slice
- integer, dimension(NSPEC_INNER_CORE) :: idoubling
+ ! local parameters
- double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table,density_table,minus_deriv_gravity_table
-
-! local parameters
! Deville
! manually inline the calls to the Deville et al. (2002) routines
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
@@ -192,68 +181,10 @@
integer :: ispec,ispec_strain
integer :: i,j,k
integer :: iglob1
+! integer :: computed_elements
+ integer :: num_elements,ispec_p
+ integer :: iphase
-! this for non blocking MPI
- integer :: iphase,icall
-
- integer :: computed_elements
-
- logical, dimension(NSPEC_INNER_CORE) :: is_on_a_slice_edge_inner_core
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: accel_crust_mantle
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
-
- integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
-
- integer :: ichunk,iproc_xi,iproc_eta,myrank
-
- integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
-
- integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
- integer npoin2D_faces_inner_core(NUMFACES_SHARED)
-
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- npoin2D_xi_inner_core,npoin2D_eta_inner_core
-
-! communication pattern for faces between chunks
- integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
-
-! communication pattern for corners between chunks
- integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-
- integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
- integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
-
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
-
- integer :: npoin2D_max_all_CM_IC
- real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC) :: buffer_send_faces,buffer_received_faces
-
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector
-
-! for matching with central cube in inner core
- integer nb_msgs_theor_in_cube, npoin2D_cube_from_slices,iphase_CC
- integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
- double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices
- double precision, dimension(npoin2D_cube_from_slices,NDIM,nb_msgs_theor_in_cube) :: buffer_all_cube_from_slices
- integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
- integer receiver_cube_from_slices
- logical :: INCLUDE_CENTRAL_CUBE
-
-! local to global mapping
- integer NSPEC2D_BOTTOM_INNER_CORE
- integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
-
#ifdef _HANDOPT
integer, dimension(5) :: iglobv5
#endif
@@ -262,47 +193,24 @@
! big loop over all spectral elements in the solid
! ****************************************************
- computed_elements = 0
+! computed_elements = 0
+ if( .not. phase_is_inner ) then
+ iphase = 1
+ num_elements = nspec_outer
+ else
+ iphase = 2
+ num_elements = nspec_inner
+ endif
- do ispec = 1,NSPEC_INNER_CORE
+ do ispec_p = 1,num_elements
-! hide communications by computing the edges first
- if((icall == 2 .and. is_on_a_slice_edge_inner_core(ispec)) .or. &
- (icall == 1 .and. .not. is_on_a_slice_edge_inner_core(ispec))) cycle
+ ispec = phase_ispec_inner(ispec_p,iphase)
+ ! only compute element which belong to current phase (inner or outer elements)
+
! exclude fictitious elements in central cube
if(idoubling(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then
-! process the communications every ELEMENTS_NONBLOCKING elements
- computed_elements = computed_elements + 1
- if (USE_NONBLOCKING_COMMS .and. icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_CM_IC) == 0) then
-
- if(iphase <= 7) call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
- NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_CM, &
- NGLOB1D_RADIAL_IC,NCHUNKS_VAL,iphase)
-
- if(INCLUDE_CENTRAL_CUBE) then
- if(iphase > 7 .and. iphase_CC <= 4) &
- call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
- ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,accel_inner_core,NDIM,iphase_CC)
- endif
-
- endif
-
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
@@ -542,8 +450,12 @@
if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. ) ) then
! note: fortran passes pointers to array location, thus R_memory(1,1,...) should be fine
- call compute_element_att_stress( R_memory(1,1,i,j,k,ispec), &
- sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
+ call compute_element_att_stress(R_xx(1,i,j,k,ispec), &
+ R_yy(1,i,j,k,ispec), &
+ R_xy(1,i,j,k,ispec), &
+ R_xz(1,i,j,k,ispec), &
+ R_yz(1,i,j,k,ispec), &
+ sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
endif
@@ -808,17 +720,23 @@
if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. ) ) then
! updates R_memory
- call compute_element_att_memory_ic(ispec,R_memory, &
+ call compute_element_att_memory_ic(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, &
vx,vy,vz,vnspec,factor_common, &
alphaval,betaval,gammaval, &
muvstore, &
- epsilondev,epsilondev_loc)
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz, &
+ epsilondev_loc)
endif
! save deviatoric strain for Runge-Kutta scheme
if(COMPUTE_AND_STORE_STRAIN) then
- epsilondev(:,:,:,:,ispec) = epsilondev_loc(:,:,:,:)
+ epsilondev_xx(:,:,:,ispec) = epsilondev_loc(1,:,:,:)
+ epsilondev_yy(:,:,:,ispec) = epsilondev_loc(2,:,:,:)
+ epsilondev_xy(:,:,:,ispec) = epsilondev_loc(3,:,:,:)
+ epsilondev_xz(:,:,:,ispec) = epsilondev_loc(4,:,:,:)
+ epsilondev_yz(:,:,:,ispec) = epsilondev_loc(5,:,:,:)
endif
endif ! end test to exclude fictitious elements in central cube
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -26,72 +26,60 @@
!=====================================================================
subroutine compute_forces_outer_core(time,deltat,two_omega_earth, &
- A_array_rotation,B_array_rotation, &
- d_ln_density_dr_table, &
- minus_rho_g_over_kappa_fluid,displfluid,accelfluid, &
- div_displfluid, &
- xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- is_on_a_slice_edge_outer_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar,iphase,icall, &
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- ibool,MOVIE_VOLUME)
+ A_array_rotation,B_array_rotation, &
+ displfluid,accelfluid,div_displfluid, &
+ phase_is_inner)
- implicit none
+ use constants
- include "constants.h"
+ use specfem_par,only: &
+ hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+ minus_rho_g_over_kappa_fluid,d_ln_density_dr_table, &
+ MOVIE_VOLUME, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ npoin2D_max_all_CM_IC
-! include values created by the mesher
-! done for performance only using static allocation to allow for loop unrolling
- include "OUTPUT_FILES/values_from_mesher.h"
+ use specfem_par_outercore,only: &
+ xstore => xstore_outer_core,ystore => ystore_outer_core,zstore => zstore_outer_core, &
+ xix => xix_outer_core,xiy => xiy_outer_core,xiz => xiz_outer_core, &
+ etax => etax_outer_core,etay => etay_outer_core,etaz => etaz_outer_core, &
+ gammax => gammax_outer_core,gammay => gammay_outer_core,gammaz => gammaz_outer_core, &
+ ibool => ibool_outer_core, &
+ iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
+ npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
+ iboolfaces_outer_core,iboolcorner_outer_core, &
+ phase_ispec_inner => phase_ispec_inner_outer_core, &
+ nspec_outer => nspec_outer_outer_core, &
+ nspec_inner => nspec_inner_outer_core
-! displacement and acceleration
+ implicit none
+
+ ! for the Euler scheme for rotation
+ real(kind=CUSTOM_REAL) time,deltat,two_omega_earth
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
+ A_array_rotation,B_array_rotation
+
+ ! displacement and acceleration
real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: displfluid,accelfluid
-! divergence of displacement
+ ! divergence of displacement
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: div_displfluid
-! arrays with mesh parameters per slice
- integer, dimension(NGLLX,NGLLY,NGLLZ,nspec_outer_core) :: ibool
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec_outer_core) :: xix,xiy,xiz, &
- etax,etay,etaz,gammax,gammay,gammaz
+ ! inner/outer element run flag
+ logical :: phase_is_inner
-! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
- real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
+ ! local parameters
- logical MOVIE_VOLUME
-
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
-
-! for gravity
+ ! for gravity
integer int_radius
double precision radius,theta,phi,gxl,gyl,gzl
double precision cos_theta,sin_theta,cos_phi,sin_phi
- double precision, dimension(NRAD_GRAVITY) :: minus_rho_g_over_kappa_fluid
- double precision, dimension(NRAD_GRAVITY) :: d_ln_density_dr_table
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: gravity_term
- real(kind=CUSTOM_REAL), dimension(nglob_outer_core) :: xstore,ystore,zstore
-
-! for the Euler scheme for rotation
- real(kind=CUSTOM_REAL) time,deltat,two_omega_earth
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
- A_array_rotation,B_array_rotation
-
+ ! for the Euler scheme for rotation
real(kind=CUSTOM_REAL) two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rotation,B_rotation, &
ux_rotation,uy_rotation,dpotentialdx_with_rot,dpotentialdy_with_rot
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: source_euler_A,source_euler_B
@@ -105,74 +93,30 @@
double precision grad_x_ln_rho,grad_y_ln_rho,grad_z_ln_rho
-! this for non blocking MPI
- integer :: ichunk,iproc_xi,iproc_eta,myrank
+! integer :: computed_elements
+ integer :: num_elements,ispec_p
+ integer :: iphase
- integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_OC) :: iboolleft_xi_outer_core,iboolright_xi_outer_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_OC) :: iboolleft_eta_outer_core,iboolright_eta_outer_core
-
- integer npoin2D_faces_outer_core(NUMFACES_SHARED)
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_outer_core,npoin2D_eta_outer_core
-
-! communication pattern for faces between chunks
- integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
-
-! communication pattern for corners between chunks
- integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-
-! indirect addressing for each message for faces and corners of the chunks
-! a given slice can belong to at most one corner and at most two faces
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_outer_core
-
-! buffers for send and receive between faces of the slices and the chunks
-! we use the same buffers to assemble scalars and vectors because vectors are
-! always three times bigger and therefore scalars can use the first part
-! of the vector buffer in memory even if it has an additional index here
-! allocate these automatic arrays in the memory stack to avoid memory fragmentation with "allocate()"
- integer :: npoin2D_max_all_CM_IC
- real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED) :: buffer_send_faces,buffer_received_faces
-
- integer, dimension(NGLOB1D_RADIAL_OC,NUMCORNERS_SHARED) :: iboolcorner_outer_core
-
- real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL_OC) :: buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar
-
- logical, dimension(NSPEC_OUTER_CORE) :: is_on_a_slice_edge_outer_core
-
- integer :: iphase,icall
-
- integer :: computed_elements
-
! ****************************************************
! big loop over all spectral elements in the fluid
! ****************************************************
- if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. icall == 1) div_displfluid(:,:,:,:) = 0._CUSTOM_REAL
+ if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. ( .not. phase_is_inner )) div_displfluid(:,:,:,:) = 0._CUSTOM_REAL
- computed_elements = 0
+! computed_elements = 0
+ if( .not. phase_is_inner ) then
+ iphase = 1
+ num_elements = nspec_outer
+ else
+ iphase = 2
+ num_elements = nspec_inner
+ endif
- do ispec = 1,NSPEC_OUTER_CORE
+ do ispec_p = 1,num_elements
-! hide communications by computing the edges first
- if((icall == 2 .and. is_on_a_slice_edge_outer_core(ispec)) .or. &
- (icall == 1 .and. .not. is_on_a_slice_edge_outer_core(ispec))) cycle
+ ispec = phase_ispec_inner(ispec_p,iphase)
-! process the communications every ELEMENTS_NONBLOCKING elements
- computed_elements = computed_elements + 1
- if (USE_NONBLOCKING_COMMS .and. icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_OC) == 0 .and. iphase <= 7) &
- call assemble_MPI_scalar(myrank,accelfluid,NGLOB_OUTER_CORE, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
- NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_OC, &
- NGLOB2DMAX_XMIN_XMAX_OC,NGLOB2DMAX_YMIN_YMAX_OC,NGLOB2DMAX_XY_VAL,NCHUNKS_VAL,iphase)
+ ! only compute element which belong to current phase (inner or outer elements)
do k=1,NGLLZ
do j=1,NGLLY
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -26,81 +26,69 @@
!=====================================================================
subroutine compute_forces_outer_core_Dev(time,deltat,two_omega_earth, &
- A_array_rotation,B_array_rotation, &
- d_ln_density_dr_table, &
- minus_rho_g_over_kappa_fluid,displfluid,accelfluid, &
- div_displfluid, &
- xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- is_on_a_slice_edge_outer_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar,iphase,icall, &
- hprime_xx,hprime_xxT, &
- hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- ibool,MOVIE_VOLUME)
+ A_array_rotation,B_array_rotation, &
+ displfluid,accelfluid,div_displfluid, &
+ phase_is_inner)
! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
- implicit none
+ use constants
- include "constants.h"
+ use specfem_par,only: &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+ minus_rho_g_over_kappa_fluid,d_ln_density_dr_table, &
+ MOVIE_VOLUME, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ npoin2D_max_all_CM_IC
-! include values created by the mesher
-! done for performance only using static allocation to allow for loop unrolling
- include "OUTPUT_FILES/values_from_mesher.h"
+ use specfem_par_outercore,only: &
+ xstore => xstore_outer_core,ystore => ystore_outer_core,zstore => zstore_outer_core, &
+ xix => xix_outer_core,xiy => xiy_outer_core,xiz => xiz_outer_core, &
+ etax => etax_outer_core,etay => etay_outer_core,etaz => etaz_outer_core, &
+ gammax => gammax_outer_core,gammay => gammay_outer_core,gammaz => gammaz_outer_core, &
+ ibool => ibool_outer_core, &
+ iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
+ npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
+ iboolfaces_outer_core,iboolcorner_outer_core, &
+ phase_ispec_inner => phase_ispec_inner_outer_core, &
+ nspec_outer => nspec_outer_outer_core, &
+ nspec_inner => nspec_inner_outer_core
-! displacement and acceleration
+
+ implicit none
+
+ ! for the Euler scheme for rotation
+ real(kind=CUSTOM_REAL) time,deltat,two_omega_earth
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
+ A_array_rotation,B_array_rotation
+
+ ! displacement and acceleration
real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: displfluid,accelfluid
-! divergence of displacement
+ ! divergence of displacement
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: div_displfluid
-! arrays with mesh parameters per slice
- integer, dimension(NGLLX,NGLLY,NGLLZ,nspec_outer_core) :: ibool
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec_outer_core) :: xix,xiy,xiz, &
- etax,etay,etaz,gammax,gammay,gammaz
+ ! inner/outer element run flag
+ logical :: phase_is_inner
-! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xxT,hprimewgll_xxT
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
+ ! local parameters
- logical MOVIE_VOLUME
-
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: newtempx1,newtempx2,newtempx3
-
-! for gravity
+ ! for gravity
integer int_radius
double precision radius,theta,phi,gxl,gyl,gzl
double precision cos_theta,sin_theta,cos_phi,sin_phi
- double precision, dimension(NRAD_GRAVITY) :: minus_rho_g_over_kappa_fluid
- double precision, dimension(NRAD_GRAVITY) :: d_ln_density_dr_table
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: gravity_term
- real(kind=CUSTOM_REAL), dimension(nglob_outer_core) :: xstore,ystore,zstore
-
-! for the Euler scheme for rotation
- real(kind=CUSTOM_REAL) time,deltat,two_omega_earth
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
- A_array_rotation,B_array_rotation
-
+ ! for the Euler scheme for rotation
real(kind=CUSTOM_REAL) two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rotation,B_rotation, &
ux_rotation,uy_rotation,dpotentialdx_with_rot,dpotentialdy_with_rot
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: source_euler_A,source_euler_B
integer ispec,iglob
integer i,j,k
-
real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
real(kind=CUSTOM_REAL) dpotentialdxl,dpotentialdyl,dpotentialdzl
real(kind=CUSTOM_REAL) sum_terms
@@ -128,74 +116,30 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
displ_times_grad_x_ln_rho,displ_times_grad_y_ln_rho,displ_times_grad_z_ln_rho
-! this for non blocking MPI
- integer :: ichunk,iproc_xi,iproc_eta,myrank
+! integer :: computed_elements
+ integer :: num_elements,ispec_p
+ integer :: iphase
- integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_OC) :: iboolleft_xi_outer_core,iboolright_xi_outer_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_OC) :: iboolleft_eta_outer_core,iboolright_eta_outer_core
-
- integer npoin2D_faces_outer_core(NUMFACES_SHARED)
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_outer_core,npoin2D_eta_outer_core
-
-! communication pattern for faces between chunks
- integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
-
-! communication pattern for corners between chunks
- integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-
-! indirect addressing for each message for faces and corners of the chunks
-! a given slice can belong to at most one corner and at most two faces
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_outer_core
-
-! buffers for send and receive between faces of the slices and the chunks
-! we use the same buffers to assemble scalars and vectors because vectors are
-! always three times bigger and therefore scalars can use the first part
-! of the vector buffer in memory even if it has an additional index here
-! allocate these automatic arrays in the memory stack to avoid memory fragmentation with "allocate()"
- integer :: npoin2D_max_all_CM_IC
- real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED) :: buffer_send_faces,buffer_received_faces
-
- integer, dimension(NGLOB1D_RADIAL_OC,NUMCORNERS_SHARED) :: iboolcorner_outer_core
-
- real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL_OC) :: buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar
-
- logical, dimension(NSPEC_OUTER_CORE) :: is_on_a_slice_edge_outer_core
-
- integer :: iphase,icall
-
- integer :: computed_elements
-
! ****************************************************
! big loop over all spectral elements in the fluid
! ****************************************************
- if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. icall == 1) div_displfluid(:,:,:,:) = 0._CUSTOM_REAL
+ if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. (.not. phase_is_inner) ) div_displfluid(:,:,:,:) = 0._CUSTOM_REAL
- computed_elements = 0
+! computed_elements = 0
+ if( .not. phase_is_inner ) then
+ iphase = 1
+ num_elements = nspec_outer
+ else
+ iphase = 2
+ num_elements = nspec_inner
+ endif
- do ispec = 1,NSPEC_OUTER_CORE
+ do ispec_p = 1,num_elements
-! hide communications by computing the edges first
- if((icall == 2 .and. is_on_a_slice_edge_outer_core(ispec)) .or. &
- (icall == 1 .and. .not. is_on_a_slice_edge_outer_core(ispec))) cycle
+ ispec = phase_ispec_inner(ispec_p,iphase)
-! process the communications every ELEMENTS_NONBLOCKING elements
- computed_elements = computed_elements + 1
- if (USE_NONBLOCKING_COMMS .and. icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_OC) == 0 .and. iphase <= 7) &
- call assemble_MPI_scalar(myrank,accelfluid,NGLOB_OUTER_CORE, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
- NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_OC, &
- NGLOB2DMAX_XMIN_XMAX_OC,NGLOB2DMAX_YMIN_YMAX_OC,NGLOB2DMAX_XY_VAL,NCHUNKS_VAL,iphase)
+ ! only compute element which belong to current phase (inner or outer elements)
do k=1,NGLLZ
do j=1,NGLLY
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -26,11 +26,306 @@
!=====================================================================
+ subroutine compute_kernels()
+
+! kernel calculations
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ use specfem_par_movie
+ implicit none
+
+ ! local parameters
+ ! dummy array that does not need to be actually read
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
+ logical,dimension(:),allocatable:: dummy_ispec_is_tiso
+
+ ! crust mantle
+ call compute_kernels_crust_mantle(ibool_crust_mantle, &
+ rho_kl_crust_mantle,beta_kl_crust_mantle, &
+ alpha_kl_crust_mantle,cijkl_kl_crust_mantle, &
+ accel_crust_mantle,b_displ_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,b_epsilondev_xy_crust_mantle, &
+ b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
+ eps_trace_over_3_crust_mantle,b_eps_trace_over_3_crust_mantle, &
+ deltat)
+
+ ! outer core
+ call compute_kernels_outer_core(ibool_outer_core, &
+ xix_outer_core,xiy_outer_core,xiz_outer_core, &
+ etax_outer_core,etay_outer_core,etaz_outer_core, &
+ gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ displ_outer_core,accel_outer_core, &
+ b_displ_outer_core,b_accel_outer_core, &
+ vector_accel_outer_core,vector_displ_outer_core, &
+ b_vector_displ_outer_core, &
+ div_displ_outer_core,b_div_displ_outer_core, &
+ rhostore_outer_core,kappavstore_outer_core, &
+ rho_kl_outer_core,alpha_kl_outer_core, &
+ deviatoric_outercore,nspec_beta_kl_outer_core,beta_kl_outer_core, &
+ deltat)
+
+ ! inner core
+ call compute_kernels_inner_core(ibool_inner_core, &
+ rho_kl_inner_core,beta_kl_inner_core, &
+ alpha_kl_inner_core, &
+ accel_inner_core,b_displ_inner_core, &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+ epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
+ b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+ b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
+ eps_trace_over_3_inner_core,b_eps_trace_over_3_inner_core, &
+ deltat)
+
+!<YANGL
+ ! NOISE TOMOGRAPHY --- source strength kernel
+ if (NOISE_TOMOGRAPHY == 3) &
+ call compute_kernels_strength_noise(nmovie_points,ibool_crust_mantle, &
+ Sigma_kl_crust_mantle,displ_crust_mantle,deltat,it, &
+ normal_x_noise,normal_y_noise,normal_z_noise, &
+ NSPEC2D_TOP(IREGION_CRUST_MANTLE),noise_surface_movie, &
+ ibelm_top_crust_mantle)
+!>YANGL
+
+ ! --- boundary kernels ------
+ if (SAVE_BOUNDARY_MESH) then
+ fluid_solid_boundary = .false.
+ iregion_code = IREGION_CRUST_MANTLE
+
+ ! Moho
+ if (.not. SUPPRESS_CRUSTAL_MESH .and. HONOR_1D_SPHERICAL_MOHO) then
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ ! -- idoubling_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_moho_top,normal_moho,moho_kl_top,fluid_solid_boundary,NSPEC2D_MOHO)
+
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ ! --idoubling_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_bot,ibelm_moho_bot,normal_moho,moho_kl_bot,fluid_solid_boundary,NSPEC2D_MOHO)
+
+ moho_kl = moho_kl + (moho_kl_top - moho_kl_bot) * deltat
+ endif
+
+ ! 400
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ ! --idoubling_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_400_top,normal_400,d400_kl_top,fluid_solid_boundary,NSPEC2D_400)
+
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ ! --idoubling_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_bot,ibelm_400_bot,normal_400,d400_kl_bot,fluid_solid_boundary,NSPEC2D_400)
+
+ d400_kl = d400_kl + (d400_kl_top - d400_kl_bot) * deltat
+
+ ! 670
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ ! --idoubling_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_670_top,normal_670,d670_kl_top,fluid_solid_boundary,NSPEC2D_670)
+
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ ! --idoubling_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_bot,ibelm_670_bot,normal_670,d670_kl_bot,fluid_solid_boundary,NSPEC2D_670)
+
+ d670_kl = d670_kl + (d670_kl_top - d670_kl_bot) * deltat
+
+ ! CMB
+ fluid_solid_boundary = .true.
+ iregion_code = IREGION_CRUST_MANTLE
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ ! -- idoubling_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle, muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_bottom_crust_mantle,normal_top_outer_core, &
+ cmb_kl_top,fluid_solid_boundary,NSPEC2D_CMB)
+
+ iregion_code = IREGION_OUTER_CORE
+ ! dummy allocation
+ allocate(dummy_ispec_is_tiso(NSPEC_OUTER_CORE))
+ dummy_ispec_is_tiso(:) = .false.
+ call compute_boundary_kernel(vector_displ_outer_core,vector_accel_outer_core, &
+ b_vector_displ_outer_core,nspec_outer_core, &
+ iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
+ ! --idoubling_outer_core, &
+ xix_outer_core,xiy_outer_core,xiz_outer_core, &
+ etax_outer_core,etay_outer_core,etaz_outer_core,&
+ gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_outer_core,kappavstore_outer_core,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ k_bot,ibelm_top_outer_core,normal_top_outer_core, &
+ cmb_kl_bot,fluid_solid_boundary,NSPEC2D_CMB)
+
+ cmb_kl = cmb_kl + (cmb_kl_top - cmb_kl_bot) * deltat
+
+ ! ICB
+ fluid_solid_boundary = .true.
+ call compute_boundary_kernel(vector_displ_outer_core,vector_accel_outer_core, &
+ b_vector_displ_outer_core,nspec_outer_core, &
+ iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
+ ! --idoubling_outer_core, &
+ xix_outer_core,xiy_outer_core,xiz_outer_core, &
+ etax_outer_core,etay_outer_core,etaz_outer_core,&
+ gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_outer_core,kappavstore_outer_core,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ k_top,ibelm_bottom_outer_core,normal_bottom_outer_core, &
+ icb_kl_top,fluid_solid_boundary,NSPEC2D_ICB)
+
+ deallocate(dummy_ispec_is_tiso)
+
+ ! ICB
+ iregion_code = IREGION_INNER_CORE
+ ! dummy allocation
+ allocate(dummy_ispec_is_tiso(NSPEC_INNER_CORE))
+ dummy_ispec_is_tiso(:) = .false.
+ call compute_boundary_kernel(displ_inner_core,accel_inner_core, &
+ b_displ_inner_core,nspec_inner_core,iregion_code, &
+ ystore_inner_core,zstore_inner_core,ibool_inner_core,dummy_ispec_is_tiso, &
+ ! -- idoubling_inner_core, &
+ xix_inner_core,xiy_inner_core,xiz_inner_core, &
+ etax_inner_core,etay_inner_core,etaz_inner_core,&
+ gammax_inner_core,gammay_inner_core,gammaz_inner_core,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
+ dummy_array,dummy_array,dummy_array, &
+ c11store_inner_core,c12store_inner_core,c13store_inner_core,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ c33store_inner_core,dummy_array,dummy_array, &
+ dummy_array,c44store_inner_core,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ k_bot,ibelm_top_inner_core,normal_bottom_outer_core, &
+ icb_kl_bot,fluid_solid_boundary,NSPEC2D_ICB)
+ deallocate(dummy_ispec_is_tiso)
+ icb_kl = icb_kl + (icb_kl_top - icb_kl_bot) * deltat
+ endif
+
+ ! approximate hessian
+ if( APPROXIMATE_HESS_KL ) then
+ call compute_kernels_hessian(ibool_crust_mantle, &
+ hess_kl_crust_mantle,&
+ accel_crust_mantle,b_accel_crust_mantle, &
+ deltat)
+ endif
+
+
+ end subroutine compute_kernels
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
subroutine compute_kernels_crust_mantle(ibool_crust_mantle, &
rho_kl_crust_mantle,beta_kl_crust_mantle, &
alpha_kl_crust_mantle,cijkl_kl_crust_mantle, &
accel_crust_mantle,b_displ_crust_mantle, &
- epsilondev_crust_mantle,b_epsilondev_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,b_epsilondev_xy_crust_mantle, &
+ b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
eps_trace_over_3_crust_mantle,b_eps_trace_over_3_crust_mantle, &
deltat)
@@ -53,11 +348,18 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
b_displ_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
- epsilondev_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
- b_epsilondev_crust_mantle
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
+! epsilondev_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+! b_epsilondev_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+ b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,b_epsilondev_xy_crust_mantle, &
+ b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle
+
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STRAIN_ONLY) :: &
eps_trace_over_3_crust_mantle
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
@@ -95,9 +397,18 @@
+ accel_crust_mantle(2,iglob) * b_displ_crust_mantle(2,iglob) &
+ accel_crust_mantle(3,iglob) * b_displ_crust_mantle(3,iglob) )
- epsilondev_loc(:) = epsilondev_crust_mantle(:,i,j,k,ispec)
- b_epsilondev_loc(:) = b_epsilondev_crust_mantle(:,i,j,k,ispec)
+ epsilondev_loc(1) = epsilondev_xx_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(2) = epsilondev_yy_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(3) = epsilondev_xy_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(4) = epsilondev_xz_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(5) = epsilondev_yz_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(1) = b_epsilondev_xx_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(2) = b_epsilondev_yy_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(3) = b_epsilondev_xy_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(4) = b_epsilondev_xz_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(5) = b_epsilondev_yz_crust_mantle(i,j,k,ispec)
+
! For anisotropic kernels
if (ANISOTROPIC_KL) then
@@ -446,7 +757,10 @@
rho_kl_inner_core,beta_kl_inner_core, &
alpha_kl_inner_core, &
accel_inner_core,b_displ_inner_core, &
- epsilondev_inner_core,b_epsilondev_inner_core, &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+ epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
+ b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+ b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
eps_trace_over_3_inner_core,b_eps_trace_over_3_inner_core, &
deltat)
@@ -466,11 +780,20 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE_ADJOINT) :: &
b_displ_inner_core
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
- epsilondev_inner_core
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
- b_epsilondev_inner_core
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
+! epsilondev_inner_core
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+ epsilondev_xz_inner_core,epsilondev_yz_inner_core
+
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
+! b_epsilondev_inner_core
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
+ b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+ b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core
+
+
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STRAIN_ONLY) :: &
eps_trace_over_3_inner_core
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
@@ -497,8 +820,18 @@
+ accel_inner_core(2,iglob) * b_displ_inner_core(2,iglob) &
+ accel_inner_core(3,iglob) * b_displ_inner_core(3,iglob) )
- epsilondev_loc(:) = epsilondev_inner_core(:,i,j,k,ispec)
- b_epsilondev_loc(:) = b_epsilondev_inner_core(:,i,j,k,ispec)
+ epsilondev_loc(1) = epsilondev_xx_inner_core(i,j,k,ispec)
+ epsilondev_loc(2) = epsilondev_yy_inner_core(i,j,k,ispec)
+ epsilondev_loc(3) = epsilondev_xy_inner_core(i,j,k,ispec)
+ epsilondev_loc(4) = epsilondev_xz_inner_core(i,j,k,ispec)
+ epsilondev_loc(5) = epsilondev_yz_inner_core(i,j,k,ispec)
+
+ b_epsilondev_loc(1) = b_epsilondev_xx_inner_core(i,j,k,ispec)
+ b_epsilondev_loc(2) = b_epsilondev_yy_inner_core(i,j,k,ispec)
+ b_epsilondev_loc(3) = b_epsilondev_xy_inner_core(i,j,k,ispec)
+ b_epsilondev_loc(4) = b_epsilondev_xz_inner_core(i,j,k,ispec)
+ b_epsilondev_loc(5) = b_epsilondev_yz_inner_core(i,j,k,ispec)
+
beta_kl_inner_core(i,j,k,ispec) = beta_kl_inner_core(i,j,k,ispec) &
+ deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
+ (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_seismograms.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_seismograms.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -191,7 +191,9 @@
!
subroutine compute_seismograms_adjoint(NSOURCES,nrec_local,displ_crust_mantle, &
- eps_trace_over_3_crust_mantle,epsilondev_crust_mantle, &
+ eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
nu_source,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
hxir_store,hetar_store,hgammar_store, &
hpxir_store,hpetar_store,hpgammar_store, &
@@ -215,9 +217,13 @@
displ_crust_mantle
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STRAIN_ONLY) :: &
eps_trace_over_3_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
- epsilondev_crust_mantle
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
+! epsilondev_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle
+
double precision, dimension(NDIM,NDIM,NSOURCES) :: nu_source
double precision, dimension(NSOURCES) :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
@@ -297,11 +303,11 @@
uzd = uzd + dble(displ_crust_mantle(3,iglob))*hlagrange
eps_trace = eps_trace + dble(eps_trace_over_3_crust_mantle(i,j,k,ispec_selected_source(irec)))*hlagrange
- dxx = dxx + dble(epsilondev_crust_mantle(1,i,j,k,ispec_selected_source(irec)))*hlagrange
- dyy = dyy + dble(epsilondev_crust_mantle(2,i,j,k,ispec_selected_source(irec)))*hlagrange
- dxy = dxy + dble(epsilondev_crust_mantle(3,i,j,k,ispec_selected_source(irec)))*hlagrange
- dxz = dxz + dble(epsilondev_crust_mantle(4,i,j,k,ispec_selected_source(irec)))*hlagrange
- dyz = dyz + dble(epsilondev_crust_mantle(5,i,j,k,ispec_selected_source(irec)))*hlagrange
+ dxx = dxx + dble(epsilondev_xx_crust_mantle(i,j,k,ispec_selected_source(irec)))*hlagrange
+ dyy = dyy + dble(epsilondev_yy_crust_mantle(i,j,k,ispec_selected_source(irec)))*hlagrange
+ dxy = dxy + dble(epsilondev_xy_crust_mantle(i,j,k,ispec_selected_source(irec)))*hlagrange
+ dxz = dxz + dble(epsilondev_xz_crust_mantle(i,j,k,ispec_selected_source(irec)))*hlagrange
+ dyz = dyz + dble(epsilondev_yz_crust_mantle(i,j,k,ispec_selected_source(irec)))*hlagrange
displ_s(:,i,j,k) = displ_crust_mantle(:,iglob)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -25,81 +25,37 @@
!
!=====================================================================
- subroutine compute_stacey_crust_mantle(ichunk,SIMULATION_TYPE, &
- NSTEP,it,SAVE_FORWARD,ibool_crust_mantle, &
- veloc_crust_mantle,accel_crust_mantle,b_accel_crust_mantle, &
- jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle, &
- jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle, &
- wgllwgll_xz,wgllwgll_yz, &
- normal_xmin_crust_mantle,normal_xmax_crust_mantle, &
- normal_ymin_crust_mantle,normal_ymax_crust_mantle, &
- rho_vp_crust_mantle,rho_vs_crust_mantle, &
- ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle, &
- ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle, &
- nimin_crust_mantle,nimax_crust_mantle, &
- njmin_crust_mantle,njmax_crust_mantle, &
- nkmin_xi_crust_mantle,nkmin_eta_crust_mantle, &
- nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
- nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
- reclen_xmin_crust_mantle,reclen_xmax_crust_mantle, &
- reclen_ymin_crust_mantle,reclen_ymax_crust_mantle, &
- nabs_xmin_cm,nabs_xmax_cm,nabs_ymin_cm,nabs_ymax_cm, &
- absorb_xmin_crust_mantle,absorb_xmax_crust_mantle, &
- absorb_ymin_crust_mantle,absorb_ymax_crust_mantle)
+ subroutine compute_stacey_crust_mantle()
- implicit none
+ use constants
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+ use specfem_par,only: &
+ ichunk,SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it, &
+ wgllwgll_xz,wgllwgll_yz,wgllwgll_xy
- integer ichunk,SIMULATION_TYPE
- integer NSTEP,it
- logical SAVE_FORWARD
+ use specfem_par_crustmantle, only: &
+ veloc_crust_mantle,accel_crust_mantle,b_accel_crust_mantle, &
+ ibool_crust_mantle, &
+ jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle, &
+ jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle, &
+ normal_xmin_crust_mantle,normal_xmax_crust_mantle, &
+ normal_ymin_crust_mantle,normal_ymax_crust_mantle, &
+ rho_vp_crust_mantle,rho_vs_crust_mantle, &
+ ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle, &
+ ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle, &
+ nimin_crust_mantle,nimax_crust_mantle, &
+ njmin_crust_mantle,njmax_crust_mantle, &
+ nkmin_xi_crust_mantle,nkmin_eta_crust_mantle, &
+ nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
+ nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
+ reclen_xmin_crust_mantle,reclen_xmax_crust_mantle, &
+ reclen_ymin_crust_mantle,reclen_ymax_crust_mantle, &
+ nabs_xmin_cm,nabs_xmax_cm,nabs_ymin_cm,nabs_ymax_cm, &
+ absorb_xmin_crust_mantle,absorb_xmax_crust_mantle, &
+ absorb_ymin_crust_mantle,absorb_ymax_crust_mantle
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
+ implicit none
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
- veloc_crust_mantle,accel_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
- b_accel_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_CM) :: &
- jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_CM) :: &
- jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_CM) :: &
- normal_xmin_crust_mantle,normal_xmax_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2DMAX_YMIN_YMAX_CM) :: &
- normal_ymin_crust_mantle,normal_ymax_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STACEY) :: &
- rho_vp_crust_mantle,rho_vs_crust_mantle
-
- integer, dimension(NSPEC2DMAX_XMIN_XMAX_CM) :: ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle
- integer, dimension(NSPEC2DMAX_YMIN_YMAX_CM) :: ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle
-
- integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_CM) :: &
- nimin_crust_mantle,nimax_crust_mantle,nkmin_eta_crust_mantle
- integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_CM) :: &
- njmin_crust_mantle,njmax_crust_mantle,nkmin_xi_crust_mantle
-
- integer nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
- nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle
-
- integer reclen_xmin_crust_mantle,reclen_xmax_crust_mantle,&
- reclen_ymin_crust_mantle,reclen_ymax_crust_mantle
-
- integer nabs_xmin_cm,nabs_xmax_cm,nabs_ymin_cm,nabs_ymax_cm
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLY,NGLLZ,nabs_xmin_cm) :: absorb_xmin_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLY,NGLLZ,nabs_xmax_cm) :: absorb_xmax_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLZ,nabs_ymin_cm) :: absorb_ymin_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLZ,nabs_ymax_cm) :: absorb_ymax_crust_mantle
-
-
! local parameters
real(kind=CUSTOM_REAL) :: weight
real(kind=CUSTOM_REAL) :: vn,vx,vy,vz,nx,ny,nz,tx,ty,tz
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -26,86 +26,39 @@
!=====================================================================
- subroutine compute_stacey_outer_core(ichunk,SIMULATION_TYPE,SAVE_FORWARD, &
- NSTEP,it,ibool_outer_core, &
- veloc_outer_core,accel_outer_core,b_accel_outer_core, &
- vp_outer_core,wgllwgll_xz,wgllwgll_yz,wgllwgll_xy, &
- jacobian2D_bottom_outer_core, &
- jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core, &
- jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core, &
- ibelm_bottom_outer_core, &
- ibelm_xmin_outer_core,ibelm_xmax_outer_core, &
- ibelm_ymin_outer_core,ibelm_ymax_outer_core, &
- nimin_outer_core,nimax_outer_core, &
- njmin_outer_core,njmax_outer_core, &
- nkmin_xi_outer_core,nkmin_eta_outer_core, &
- NSPEC2D_BOTTOM, &
- nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
- reclen_zmin, &
- reclen_xmin_outer_core,reclen_xmax_outer_core, &
- reclen_ymin_outer_core,reclen_ymax_outer_core, &
- nabs_zmin_oc, &
- nabs_xmin_oc,nabs_xmax_oc,nabs_ymin_oc,nabs_ymax_oc, &
- absorb_zmin_outer_core, &
- absorb_xmin_outer_core,absorb_xmax_outer_core, &
- absorb_ymin_outer_core,absorb_ymax_outer_core)
+ subroutine compute_stacey_outer_core()
- implicit none
+ use constants
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+ use specfem_par,only: &
+ ichunk,SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it, &
+ wgllwgll_xz,wgllwgll_yz,wgllwgll_xy, &
+ NSPEC2D_BOTTOM
- integer ichunk,SIMULATION_TYPE
- integer NSTEP,it
- logical SAVE_FORWARD
+ use specfem_par_outercore,only: &
+ veloc_outer_core,accel_outer_core,b_accel_outer_core, &
+ ibool_outer_core, &
+ jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core, &
+ jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core, &
+ jacobian2D_bottom_outer_core, &
+ nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
+ nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
+ vp_outer_core, &
+ nimin_outer_core,nimax_outer_core,nkmin_eta_outer_core, &
+ njmin_outer_core,njmax_outer_core,nkmin_xi_outer_core, &
+ absorb_xmin_outer_core,absorb_xmax_outer_core, &
+ absorb_ymin_outer_core,absorb_ymax_outer_core, &
+ absorb_zmin_outer_core, &
+ reclen_xmin_outer_core,reclen_xmax_outer_core, &
+ reclen_ymin_outer_core,reclen_ymax_outer_core, &
+ reclen_zmin, &
+ ibelm_xmin_outer_core,ibelm_xmax_outer_core, &
+ ibelm_ymin_outer_core,ibelm_ymax_outer_core, &
+ ibelm_bottom_outer_core
+ implicit none
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
-
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: &
- veloc_outer_core,accel_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE_ADJOINT) :: &
- b_accel_outer_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_STACEY) :: vp_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: &
- jacobian2D_bottom_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_OC) :: &
- jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_OC) :: &
- jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core
-
-
- integer, dimension(NSPEC2D_BOTTOM_OC) :: ibelm_bottom_outer_core
- integer, dimension(NSPEC2DMAX_XMIN_XMAX_OC) :: ibelm_xmin_outer_core,ibelm_xmax_outer_core
- integer, dimension(NSPEC2DMAX_YMIN_YMAX_OC) :: ibelm_ymin_outer_core,ibelm_ymax_outer_core
-
- integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_OC) :: &
- nimin_outer_core,nimax_outer_core,nkmin_eta_outer_core
- integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_OC) :: &
- njmin_outer_core,njmax_outer_core,nkmin_xi_outer_core
-
- integer, dimension(MAX_NUM_REGIONS) :: NSPEC2D_BOTTOM
- integer nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core
-
- integer reclen_zmin,reclen_xmin_outer_core,reclen_xmax_outer_core,&
- reclen_ymin_outer_core,reclen_ymax_outer_core
-
- integer nabs_xmin_oc,nabs_xmax_oc,nabs_ymin_oc,nabs_ymax_oc,nabs_zmin_oc
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,nabs_xmin_oc) :: absorb_xmin_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,nabs_xmax_oc) :: absorb_xmax_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,nabs_ymin_oc) :: absorb_ymin_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,nabs_ymax_oc) :: absorb_ymax_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,nabs_zmin_oc) :: absorb_zmin_outer_core
-
! local parameters
real(kind=CUSTOM_REAL) :: sn,weight
- !integer :: reclen1,reclen2
integer :: i,j,k,ispec2D,ispec,iglob
! note: we use c functions for I/O as they still have a better performance than
@@ -124,14 +77,7 @@
if (SIMULATION_TYPE == 3 .and. nspec2D_xmin_outer_core > 0) then
! note: backward/reconstructed wavefields are read in after the Newmark time scheme in the first time loop
! this leads to a corresponding boundary condition at time index NSTEP - (it-1) = NSTEP - it + 1
-
call read_abs(4,absorb_xmin_outer_core,reclen_xmin_outer_core,NSTEP-it+1)
-
-! read(61,rec=NSTEP-it+1) reclen1,absorb_xmin_outer_core,reclen2
-! if (reclen1 /= reclen_xmin_outer_core .or. reclen1 /= reclen2) &
-! call exit_MPI(myrank,'Error reading absorbing contribution absorb_xmin_outer_core')
-
-
endif
do ispec2D=1,nspec2D_xmin_outer_core
@@ -163,10 +109,7 @@
! writes absorbing boundary values
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmin_outer_core > 0 ) then
-
call write_abs(4,absorb_xmin_outer_core,reclen_xmin_outer_core,it)
-
-! write(61,rec=it) reclen_xmin_outer_core,absorb_xmin_outer_core,reclen_xmin_outer_core
endif
endif
@@ -176,12 +119,7 @@
if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AB) then
if (SIMULATION_TYPE == 3 .and. nspec2D_xmax_outer_core > 0) then
-
call read_abs(5,absorb_xmax_outer_core,reclen_xmax_outer_core,NSTEP-it+1)
-
-! read(62,rec=NSTEP-it+1) reclen1,absorb_xmax_outer_core,reclen2
-! if (reclen1 /= reclen_xmax_outer_core .or. reclen1 /= reclen2) &
-! call exit_MPI(myrank,'Error reading absorbing contribution absorb_xmax_outer_core')
endif
do ispec2D=1,nspec2D_xmax_outer_core
@@ -214,20 +152,13 @@
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmax_outer_core > 0 ) then
call write_abs(5,absorb_xmax_outer_core,reclen_xmax_outer_core,it)
-
-! write(62,rec=it) reclen_xmax_outer_core,absorb_xmax_outer_core,reclen_xmax_outer_core
endif
endif
! ymin
if (SIMULATION_TYPE == 3 .and. nspec2D_ymin_outer_core > 0) then
-
call read_abs(6,absorb_ymin_outer_core,reclen_ymin_outer_core,NSTEP-it+1)
-
-! read(63,rec=NSTEP-it+1) reclen1,absorb_ymin_outer_core,reclen2
-! if (reclen1 /= reclen_ymin_outer_core .or. reclen1 /= reclen2) &
-! call exit_MPI(myrank,'Error reading absorbing contribution absorb_ymin_outer_core')
endif
do ispec2D=1,nspec2D_ymin_outer_core
@@ -260,19 +191,13 @@
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_ymin_outer_core > 0 ) then
call write_abs(6,absorb_ymin_outer_core,reclen_ymin_outer_core,it)
-
-! write(63,rec=it) reclen_ymin_outer_core,absorb_ymin_outer_core,reclen_ymin_outer_core
endif
! ymax
if (SIMULATION_TYPE == 3 .and. nspec2D_ymax_outer_core > 0) then
-
call read_abs(7,absorb_ymax_outer_core,reclen_ymax_outer_core,NSTEP-it+1)
-
-! read(64,rec=NSTEP-it+1) reclen1,absorb_ymax_outer_core,reclen2
-! if (reclen1 /= reclen_ymax_outer_core .or. reclen1 /= reclen2) &
-! call exit_MPI(myrank,'Error reading absorbing contribution absorb_ymax_outer_core')
endif
+
do ispec2D=1,nspec2D_ymax_outer_core
ispec=ibelm_ymax_outer_core(ispec2D)
@@ -303,18 +228,11 @@
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_ymax_outer_core > 0 ) then
call write_abs(7,absorb_ymax_outer_core,reclen_ymax_outer_core,it)
-
-! write(64,rec=it) reclen_ymax_outer_core,absorb_ymax_outer_core,reclen_ymax_outer_core
endif
! for surface elements exactly on the ICB
if (SIMULATION_TYPE == 3 .and. NSPEC2D_BOTTOM(IREGION_OUTER_CORE)> 0) then
-
call read_abs(8,absorb_zmin_outer_core,reclen_zmin,NSTEP-it+1)
-
-! read(65,rec=NSTEP-it+1) reclen1,absorb_zmin_outer_core,reclen2
-! if (reclen1 /= reclen_zmin .or. reclen1 /= reclen2) &
-! call exit_MPI(myrank,'Error reading absorbing contribution absorb_zmin_outer_core')
endif
do ispec2D = 1,NSPEC2D_BOTTOM(IREGION_OUTER_CORE)
@@ -344,8 +262,6 @@
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. NSPEC2D_BOTTOM(IREGION_OUTER_CORE) > 0 ) then
call write_abs(8,absorb_zmin_outer_core,reclen_zmin,it)
-
-! write(65,rec=it) reclen_zmin,absorb_zmin_outer_core,reclen_zmin
endif
end subroutine compute_stacey_outer_core
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90 (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,283 @@
+!=====================================================================
+!
+! S p e c f e m 3 D G l o b e V e r s i o n 5 . 1
+! --------------------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA
+! and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine finalize_simulation()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ use specfem_par_movie
+ implicit none
+
+ ! synchronize all processes, waits until all processes have written their seismograms
+ call sync_all()
+
+ ! closes Stacey absorbing boundary snapshots
+ if( ABSORBING_CONDITIONS ) then
+ ! crust mantle
+ if (nspec2D_xmin_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ call close_file_abs(0)
+ endif
+
+ if (nspec2D_xmax_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ call close_file_abs(1)
+ endif
+
+ if (nspec2D_ymin_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ call close_file_abs(2)
+ endif
+
+ if (nspec2D_ymax_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ call close_file_abs(3)
+ endif
+
+ ! outer core
+ if (nspec2D_xmin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ call close_file_abs(4)
+ endif
+
+ if (nspec2D_xmax_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ call close_file_abs(5)
+ endif
+
+ if (nspec2D_ymin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ call close_file_abs(6)
+ endif
+
+ if (nspec2D_ymax_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ call close_file_abs(7)
+ endif
+
+ if (NSPEC2D_BOTTOM(IREGION_OUTER_CORE) > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ call close_file_abs(8)
+ endif
+
+ ! frees memory
+ deallocate(absorb_xmin_crust_mantle, &
+ absorb_xmax_crust_mantle, &
+ absorb_ymin_crust_mantle, &
+ absorb_ymax_crust_mantle, &
+ absorb_xmin_outer_core, &
+ absorb_xmax_outer_core, &
+ absorb_ymin_outer_core, &
+ absorb_ymax_outer_core, &
+ absorb_zmin_outer_core)
+ endif
+
+ ! save/read the surface movie using the same c routine as we do for absorbing boundaries (file ID is 9)
+ if (NOISE_TOMOGRAPHY/=0) then
+ call close_file_abs(9)
+ endif
+
+ ! synchronize all processes
+ call sync_all()
+
+ ! save files to local disk or tape system if restart file
+ call save_forward_arrays()
+
+ ! synchronize all processes
+ call sync_all()
+
+ ! dump kernel arrays
+ if (SIMULATION_TYPE == 3) then
+ ! crust mantle
+ call save_kernels_crust_mantle(myrank,scale_t,scale_displ, &
+ cijkl_kl_crust_mantle,rho_kl_crust_mantle, &
+ alpha_kl_crust_mantle,beta_kl_crust_mantle, &
+ ystore_crust_mantle,zstore_crust_mantle, &
+ rhostore_crust_mantle,muvstore_crust_mantle, &
+ kappavstore_crust_mantle,ibool_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle, &
+ eta_anisostore_crust_mantle,ispec_is_tiso_crust_mantle, &
+ ! --idoubling_crust_mantle, &
+ LOCAL_PATH)
+
+ ! noise strength kernel
+ if (NOISE_TOMOGRAPHY == 3) then
+ call save_kernels_strength_noise(myrank,LOCAL_PATH,Sigma_kl_crust_mantle)
+ endif
+
+ ! outer core
+ call save_kernels_outer_core(myrank,scale_t,scale_displ, &
+ rho_kl_outer_core,alpha_kl_outer_core, &
+ rhostore_outer_core,kappavstore_outer_core, &
+ deviatoric_outercore,nspec_beta_kl_outer_core,beta_kl_outer_core, &
+ LOCAL_PATH)
+
+ ! inner core
+ call save_kernels_inner_core(myrank,scale_t,scale_displ, &
+ rho_kl_inner_core,beta_kl_inner_core,alpha_kl_inner_core, &
+ rhostore_inner_core,muvstore_inner_core,kappavstore_inner_core, &
+ LOCAL_PATH)
+
+ ! boundary kernel
+ if (SAVE_BOUNDARY_MESH) then
+ call save_kernels_boundary_kl(myrank,scale_t,scale_displ, &
+ moho_kl,d400_kl,d670_kl,cmb_kl,icb_kl, &
+ LOCAL_PATH,HONOR_1D_SPHERICAL_MOHO)
+ endif
+
+ ! approximate hessian
+ if( APPROXIMATE_HESS_KL ) then
+ call save_kernels_hessian(myrank,scale_t,scale_displ, &
+ hess_kl_crust_mantle,LOCAL_PATH)
+ endif
+ endif
+
+ ! save source derivatives for adjoint simulations
+ if (SIMULATION_TYPE == 2 .and. nrec_local > 0) then
+ call save_kernels_source_derivatives(nrec_local,NSOURCES,scale_displ,scale_t, &
+ nu_source,moment_der,sloc_der,stshift_der,shdur_der,number_receiver_global)
+ endif
+
+ ! frees dynamically allocated memory
+ ! mpi buffers
+ deallocate(buffer_send_faces, &
+ buffer_received_faces, &
+ b_buffer_send_faces, &
+ b_buffer_received_faces)
+
+ ! central cube buffers
+ deallocate(sender_from_slices_to_cube, &
+ buffer_all_cube_from_slices, &
+ b_buffer_all_cube_from_slices, &
+ buffer_slices, &
+ b_buffer_slices, &
+ buffer_slices2, &
+ ibool_central_cube)
+
+ ! sources
+ deallocate(islice_selected_source, &
+ ispec_selected_source, &
+ Mxx, &
+ Myy, &
+ Mzz, &
+ Mxy, &
+ Mxz, &
+ Myz, &
+ xi_source, &
+ eta_source, &
+ gamma_source, &
+ tshift_cmt, &
+ hdur, &
+ hdur_gaussian, &
+ theta_source, &
+ phi_source, &
+ nu_source)
+ if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) deallocate(sourcearrays)
+ if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
+ deallocate(iadj_vec)
+ if(nadj_rec_local > 0) then
+ deallocate(adj_sourcearrays)
+ deallocate(iadjsrc,iadjsrc_len)
+ endif
+ endif
+
+ ! receivers
+ deallocate(islice_selected_rec, &
+ ispec_selected_rec, &
+ xi_receiver, &
+ eta_receiver, &
+ gamma_receiver, &
+ station_name, &
+ network_name, &
+ stlat, &
+ stlon, &
+ stele, &
+ stbur, &
+ nu, &
+ number_receiver_global)
+ if( nrec_local > 0 ) then
+ deallocate(hxir_store, &
+ hetar_store, &
+ hgammar_store)
+ if( SIMULATION_TYPE == 2 ) then
+ deallocate(moment_der,stshift_der)
+ endif
+ endif
+ deallocate(seismograms)
+
+ if (SIMULATION_TYPE == 3) then
+ if( APPROXIMATE_HESS_KL ) then
+ deallocate(hess_kl_crust_mantle)
+ endif
+ deallocate(beta_kl_outer_core)
+ endif
+
+ ! movies
+ if(MOVIE_SURFACE .or. NOISE_TOMOGRAPHY /= 0 ) then
+ deallocate(store_val_x, &
+ store_val_y, &
+ store_val_z, &
+ store_val_ux, &
+ store_val_uy, &
+ store_val_uz)
+ if (MOVIE_SURFACE) then
+ deallocate(store_val_x_all, &
+ store_val_y_all, &
+ store_val_z_all, &
+ store_val_ux_all, &
+ store_val_uy_all, &
+ store_val_uz_all)
+ endif
+ endif
+ if(MOVIE_VOLUME) then
+ deallocate(nu_3dmovie)
+ endif
+
+ ! noise simulations
+ if ( NOISE_TOMOGRAPHY /= 0 ) then
+ deallocate(noise_sourcearray, &
+ normal_x_noise, &
+ normal_y_noise, &
+ normal_z_noise, &
+ mask_noise, &
+ noise_surface_movie)
+ endif
+
+ ! close the main output file
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'End of the simulation'
+ write(IMAIN,*)
+ close(IMAIN)
+ endif
+
+ ! synchronize all the processes to make sure everybody has finished
+ call sync_all()
+
+ end subroutine finalize_simulation
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -102,7 +102,8 @@
! a full face are missing, therefore let us add them
subroutine fix_non_blocking_central_cube(is_on_a_slice_edge, &
ibool,nspec,nglob,nb_msgs_theor_in_cube,ibelm_bottom_inner_core, &
- idoubling_inner_core,npoin2D_cube_from_slices,ibool_central_cube,NSPEC2D_BOTTOM_INNER_CORE,ichunk)
+ idoubling_inner_core,npoin2D_cube_from_slices, &
+ ibool_central_cube,NSPEC2D_BOTTOM_INNER_CORE,ichunk)
implicit none
@@ -144,8 +145,8 @@
if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-! clean the mask
- mask_ibool(:) = .false.
+ ! clean the mask
+ mask_ibool(:) = .false.
do imsg = 1,nb_msgs_theor_in_cube
do ipoin = 1,npoin2D_cube_from_slices
@@ -153,24 +154,24 @@
enddo
enddo
-! now label all the elements that have at least one corner belonging
-! to any of these buffers as elements that must contribute to the
-! first step of the calculations (performed on the edges before starting
-! the non blocking communications); there is no need to examine the inside
-! of the elements, checking their eight corners is sufficient
- do ispec = 1,nspec
- do k = 1,NGLLZ,NGLLZ-1
- do j = 1,NGLLY,NGLLY-1
- do i = 1,NGLLX,NGLLX-1
- if(mask_ibool(ibool(i,j,k,ispec))) then
- is_on_a_slice_edge(ispec) = .true.
- goto 888
- endif
+ ! now label all the elements that have at least one corner belonging
+ ! to any of these buffers as elements that must contribute to the
+ ! first step of the calculations (performed on the edges before starting
+ ! the non blocking communications); there is no need to examine the inside
+ ! of the elements, checking their eight corners is sufficient
+ do ispec = 1,nspec
+ do k = 1,NGLLZ,NGLLZ-1
+ do j = 1,NGLLY,NGLLY-1
+ do i = 1,NGLLX,NGLLX-1
+ if(mask_ibool(ibool(i,j,k,ispec))) then
+ is_on_a_slice_edge(ispec) = .true.
+ goto 888
+ endif
+ enddo
enddo
enddo
+ 888 continue
enddo
- 888 continue
- enddo
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/initialize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/initialize_simulation.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/initialize_simulation.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -26,129 +26,33 @@
!=====================================================================
- subroutine initialize_simulation(myrank,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD,NER_CRUST, &
- NER_80_MOHO,NER_220_80,NER_400_220,NER_600_400,NER_670_600,NER_771_670, &
- NER_TOPDDOUBLEPRIME_771,NER_CMB_TOPDDOUBLEPRIME,NER_OUTER_CORE, &
- NER_TOP_CENTRAL_CUBE_ICB,ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI,NEX_ETA, &
- NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
- NTSTEP_BETWEEN_READ_ADJSRC,NSTEP,NSOURCES,NTSTEP_BETWEEN_FRAMES, &
- NTSTEP_BETWEEN_OUTPUT_INFO,NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN,SIMULATION_TYPE, &
- DT,ROCEAN,RMIDDLE_CRUST,RMOHO,R80,R220,R400,R600,R670,R771,&
- RTOPDDOUBLEPRIME,RCMB,RICB, &
- RHO_TOP_OC,RHO_BOTTOM_OC,RHO_OCEANS, &
- MOVIE_VOLUME_TYPE,MOVIE_START,MOVIE_STOP, &
- HDUR_MOVIE,MOVIE_TOP,MOVIE_BOTTOM,MOVIE_WEST,MOVIE_EAST, &
- MOVIE_NORTH,MOVIE_SOUTH,MOVIE_SURFACE,MOVIE_VOLUME, &
- RECEIVERS_CAN_BE_BURIED,PRINT_SOURCE_TIME_FUNCTION, &
- SAVE_MESH_FILES,ABSORBING_CONDITIONS,INCLUDE_CENTRAL_CUBE,SAVE_FORWARD, &
- SAVE_ALL_SEISMOS_IN_ONE_FILE,MOVIE_COARSE,OUTPUT_SEISMOS_ASCII_TEXT, &
- OUTPUT_SEISMOS_SAC_ALPHANUM,OUTPUT_SEISMOS_SAC_BINARY, &
- ROTATE_SEISMOGRAMS_RT,WRITE_SEISMOGRAMS_BY_MASTER,USE_BINARY_FOR_LARGE_FILE, &
- LOCAL_PATH,MODEL,OUTPUT_FILES, &
- ratio_sampling_array, ner, doubling_index,r_bottom,r_top, &
- this_region_has_a_doubling,rmins,rmaxs, &
- TOPOGRAPHY,HONOR_1D_SPHERICAL_MOHO,ONE_CRUST, &
- nspl,rspl,espl,espl2,ibathy_topo, &
- NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
- NGLOB1D_RADIAL,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX, &
- xigll,yigll,zigll,wxgll,wygll,wzgll,wgll_cube, &
- hprime_xx,hprime_yy,hprime_zz,hprime_xxT, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- rec_filename,STATIONS,nrec,NOISE_TOMOGRAPHY)
+ subroutine initialize_simulation()
+ use specfem_par
implicit none
include 'mpif.h'
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
- integer myrank
-
- ! parameters read from parameter file
- integer MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD,NER_CRUST, &
- NER_80_MOHO,NER_220_80,NER_400_220,NER_600_400,NER_670_600,NER_771_670, &
- NER_TOPDDOUBLEPRIME_771,NER_CMB_TOPDDOUBLEPRIME,NER_OUTER_CORE, &
- NER_TOP_CENTRAL_CUBE_ICB,NEX_XI,NEX_ETA, &
- NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
- NTSTEP_BETWEEN_READ_ADJSRC,NSTEP,NSOURCES,NTSTEP_BETWEEN_FRAMES, &
- NTSTEP_BETWEEN_OUTPUT_INFO,NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN,SIMULATION_TYPE, &
- MOVIE_VOLUME_TYPE,MOVIE_START,MOVIE_STOP,NOISE_TOMOGRAPHY
-
- double precision DT,ROCEAN,RMIDDLE_CRUST, &
- RMOHO,R80,R220,R400,R600,R670,R771,RTOPDDOUBLEPRIME,RCMB,RICB, &
- RHO_TOP_OC,RHO_BOTTOM_OC,RHO_OCEANS,HDUR_MOVIE, &
- MOVIE_TOP,MOVIE_BOTTOM,MOVIE_WEST,MOVIE_EAST,MOVIE_NORTH,MOVIE_SOUTH
-
- logical MOVIE_SURFACE,MOVIE_VOLUME,RECEIVERS_CAN_BE_BURIED,PRINT_SOURCE_TIME_FUNCTION, &
- SAVE_MESH_FILES,ABSORBING_CONDITIONS,INCLUDE_CENTRAL_CUBE,SAVE_FORWARD, &
- SAVE_ALL_SEISMOS_IN_ONE_FILE,MOVIE_COARSE,OUTPUT_SEISMOS_ASCII_TEXT,&
- OUTPUT_SEISMOS_SAC_ALPHANUM,OUTPUT_SEISMOS_SAC_BINARY,&
- ROTATE_SEISMOGRAMS_RT,WRITE_SEISMOGRAMS_BY_MASTER,USE_BINARY_FOR_LARGE_FILE
-
- character(len=150) LOCAL_PATH,OUTPUT_FILES
-
- integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: ratio_sampling_array,ner
- integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: doubling_index
-
- double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: r_bottom,r_top
-
- logical, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: this_region_has_a_doubling
- double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: rmins,rmaxs
-
-
- ! mesh model parameters
- logical TOPOGRAPHY,HONOR_1D_SPHERICAL_MOHO,ONE_CRUST
- !logical COMPUTE_AND_STORE_STRAIN
-
- ! for ellipticity
- integer nspl
- double precision rspl(NR),espl(NR),espl2(NR)
-
- ! use integer array to store values
- integer, dimension(NX_BATHY,NY_BATHY) :: ibathy_topo
-
- integer, dimension(MAX_NUM_REGIONS) :: NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX, &
- NSPEC2D_BOTTOM,NSPEC2D_TOP, &
- NGLOB1D_RADIAL,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX
-
- ! Gauss-Lobatto-Legendre points of integration and weights
- double precision, dimension(NGLLX) :: xigll,wxgll
- double precision, dimension(NGLLY) :: yigll,wygll
- double precision, dimension(NGLLZ) :: zigll,wzgll
- ! product of weights for gravity term
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
- ! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
- real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xxT,hprimewgll_xxT
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
- character(len=150) rec_filename,STATIONS
- integer nrec
-
! local parameters
integer, dimension(MAX_NUM_REGIONS) :: NSPEC_computed,NGLOB_computed, &
- NSPEC2D_XI,NSPEC2D_ETA,NSPEC1D_RADIAL
+ NSPEC2D_XI,NSPEC2D_ETA,NSPEC1D_RADIAL
logical :: CUT_SUPERBRICK_XI,CUT_SUPERBRICK_ETA
integer, dimension(NB_SQUARE_CORNERS,NB_CUT_CASE) :: DIFF_NSPEC1D_RADIAL
integer, dimension(NB_SQUARE_EDGES_ONEDIR,NB_CUT_CASE) :: DIFF_NSPEC2D_XI,DIFF_NSPEC2D_ETA
integer :: ratio_divide_central_cube
integer :: sizeprocs
- integer :: ier,i,j,ios
+ integer :: ier,ios
integer :: NPROC,NPROCTOT,NEX_PER_PROC_XI,NEX_PER_PROC_ETA,NCHUNKS,NPROC_XI,NPROC_ETA
double precision :: RMOHO_FICTITIOUS_IN_MESHER,R120,R_CENTRAL_CUBE,CENTER_LONGITUDE_IN_DEGREES,&
- CENTER_LATITUDE_IN_DEGREES,ANGULAR_WIDTH_ETA_IN_DEGREES,ANGULAR_WIDTH_XI_IN_DEGREES,&
- GAMMA_ROTATION_AZIMUTH
+ CENTER_LATITUDE_IN_DEGREES,ANGULAR_WIDTH_ETA_IN_DEGREES,&
+ GAMMA_ROTATION_AZIMUTH
integer :: REFERENCE_1D_MODEL,THREE_D_MODEL
logical :: TRANSVERSE_ISOTROPY,ANISOTROPIC_3D_MANTLE,ANISOTROPIC_INNER_CORE,OCEANS, &
ATTENUATION,ATTENUATION_3D,ROTATION,ELLIPTICITY,GRAVITY,CASE_3D,ISOTROPIC_3D_MANTLE, &
HETEROGEN_3D_MANTLE,CRUSTAL,INFLATE_CENTRAL_CUBE
- character(len=150) :: MODEL,dummystring
+ character(len=150) :: dummystring
integer, external :: err_occurred
+
! sizeprocs returns number of processes started (should be equal to NPROCTOT).
! myrank is the rank of each process, between 0 and sizeprocs-1.
! as usual in MPI, process 0 is in charge of coordinating everything
@@ -196,6 +100,10 @@
call exit_MPI(myrank,'an error occurred while reading the parameter file')
endif
+ ! GPU_MODE is in par_file
+ ! parameter is optional, may not be in the Par_file
+ call read_gpu_mode(GPU_MODE)
+
endif
! distributes parameters from master to all processes
@@ -232,6 +140,10 @@
HONOR_1D_SPHERICAL_MOHO,CRUSTAL,ONE_CRUST,CASE_3D,TRANSVERSE_ISOTROPY, &
ISOTROPIC_3D_MANTLE,ANISOTROPIC_3D_MANTLE,HETEROGEN_3D_MANTLE, &
ATTENUATION,ATTENUATION_3D,ANISOTROPIC_INNER_CORE,NOISE_TOMOGRAPHY)
+
+ ! broadcasts GPU_MODE
+ call broadcast_gpu_parameters(myrank,GPU_MODE)
+
! get the base pathname for output files
call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', 'OUTPUT_FILES')
@@ -326,6 +238,59 @@
endif
+ ! checks flags
+ call initialize_simulation_check(sizeprocs,NPROCTOT,NSPEC_COMPUTED, &
+ ATTENUATION,ATTENUATION_3D,NCHUNKS,GRAVITY,ROTATION, &
+ ELLIPTICITY,OCEANS,NPROC_XI,NPROC_ETA, &
+ TRANSVERSE_ISOTROPY,ANISOTROPIC_3D_MANTLE, &
+ ANISOTROPIC_INNER_CORE)
+
+ ! counts receiver stations
+ if (SIMULATION_TYPE == 1) then
+ rec_filename = 'DATA/STATIONS'
+ else
+ rec_filename = 'DATA/STATIONS_ADJOINT'
+ endif
+ call get_value_string(STATIONS, 'solver.STATIONS', rec_filename)
+
+ ! get total number of receivers
+ if(myrank == 0) then
+ open(unit=IIN,file=STATIONS,iostat=ios,status='old',action='read')
+ nrec = 0
+ do while(ios == 0)
+ read(IIN,"(a)",iostat=ios) dummystring
+ if(ios == 0) nrec = nrec + 1
+ enddo
+ close(IIN)
+ endif
+
+ ! broadcast the information read on the master to the nodes
+ call MPI_BCAST(nrec,1,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+ if(nrec < 1) call exit_MPI(myrank,trim(STATIONS)//': need at least one receiver')
+
+ end subroutine initialize_simulation
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine initialize_simulation_check(sizeprocs,NPROCTOT,NSPEC_COMPUTED, &
+ ATTENUATION,ATTENUATION_3D,NCHUNKS,GRAVITY,ROTATION, &
+ ELLIPTICITY,OCEANS,NPROC_XI,NPROC_ETA, &
+ TRANSVERSE_ISOTROPY,ANISOTROPIC_3D_MANTLE, &
+ ANISOTROPIC_INNER_CORE)
+
+ use specfem_par
+ implicit none
+
+ integer :: sizeprocs
+ integer :: NPROCTOT,NCHUNKS,NPROC_XI,NPROC_ETA
+ integer, dimension(MAX_NUM_REGIONS) :: NSPEC_computed
+
+ logical :: TRANSVERSE_ISOTROPY,ANISOTROPIC_3D_MANTLE,ANISOTROPIC_INNER_CORE,OCEANS, &
+ ATTENUATION,ATTENUATION_3D,ROTATION,ELLIPTICITY,GRAVITY
+
+
! check that the code is running with the requested nb of processes
if(sizeprocs /= NPROCTOT) call exit_MPI(myrank,'wrong number of MPI processes(initialization specfem)')
@@ -336,71 +301,71 @@
endif
if (NSPEC_computed(IREGION_OUTER_CORE) /= NSPEC_OUTER_CORE) then
write(IMAIN,*) 'NSPEC_OUTER_CORE:',NSPEC_computed(IREGION_OUTER_CORE),NSPEC_OUTER_CORE
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 2')
+ call exit_MPI(myrank,'error in compiled parameters, please recompile solver 2')
endif
if (NSPEC_computed(IREGION_INNER_CORE) /= NSPEC_INNER_CORE) then
write(IMAIN,*) 'NSPEC_INNER_CORE:',NSPEC_computed(IREGION_INNER_CORE),NSPEC_INNER_CORE
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 3')
+ call exit_MPI(myrank,'error in compiled parameters, please recompile solver 3')
endif
if (ATTENUATION_3D .NEQV. ATTENUATION_3D_VAL) then
write(IMAIN,*) 'ATTENUATION_3D:',ATTENUATION_3D,ATTENUATION_3D_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 4')
+ call exit_MPI(myrank,'error in compiled parameters ATTENUATION_3D, please recompile solver')
endif
if (NCHUNKS /= NCHUNKS_VAL) then
write(IMAIN,*) 'NCHUNKS:',NCHUNKS,NCHUNKS_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 6')
+ call exit_MPI(myrank,'error in compiled parameters NCHUNKS, please recompile solver')
endif
if (GRAVITY .NEQV. GRAVITY_VAL) then
write(IMAIN,*) 'GRAVITY:',GRAVITY,GRAVITY_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 7')
+ call exit_MPI(myrank,'error in compiled parameters GRAVITY, please recompile solver')
endif
if (ROTATION .NEQV. ROTATION_VAL) then
write(IMAIN,*) 'ROTATION:',ROTATION,ROTATION_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 8')
+ call exit_MPI(myrank,'error in compiled parameters ROTATION, please recompile solver')
endif
if (ATTENUATION .NEQV. ATTENUATION_VAL) then
write(IMAIN,*) 'ATTENUATION:',ATTENUATION,ATTENUATION_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 9')
+ call exit_MPI(myrank,'error in compiled parameters ATTENUATION, please recompile solver')
endif
if (ELLIPTICITY .NEQV. ELLIPTICITY_VAL) then
write(IMAIN,*) 'ELLIPTICITY:',ELLIPTICITY,ELLIPTICITY_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 10')
+ call exit_MPI(myrank,'error in compiled parameters ELLIPTICITY, please recompile solver')
endif
if (OCEANS .NEQV. OCEANS_VAL) then
write(IMAIN,*) 'OCEANS:',OCEANS,OCEANS_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 10')
+ call exit_MPI(myrank,'error in compiled parameters OCEANS, please recompile solver')
endif
- if (NPROCTOT /= NPROCTOT_VAL) then
- write(IMAIN,*) 'NPROCTOT:',NPROCTOT,NPROCTOT_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 11')
- endif
if (NPROC_XI /= NPROC_XI_VAL) then
write(IMAIN,*) 'NPROC_XI:',NPROC_XI,NPROC_XI_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 11')
+ call exit_MPI(myrank,'error in compiled parameters NPROC_XI, please recompile solver')
endif
if (NPROC_ETA /= NPROC_ETA_VAL) then
write(IMAIN,*) 'NPROC_ETA:',NPROC_ETA,NPROC_ETA_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 11')
+ call exit_MPI(myrank,'error in compiled parameters NPROC_ETA, please recompile solver')
endif
+ if (NPROCTOT /= NPROCTOT_VAL) then
+ write(IMAIN,*) 'NPROCTOT:',NPROCTOT,NPROCTOT_VAL
+ call exit_MPI(myrank,'error in compiled parameters NPROCTOT, please recompile solver')
+ endif
if (NEX_XI /= NEX_XI_VAL) then
write(IMAIN,*) 'NEX_XI:',NEX_XI,NEX_XI_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 12')
+ call exit_MPI(myrank,'error in compiled parameters NEX_XI, please recompile solver')
endif
if (NEX_ETA /= NEX_ETA_VAL) then
write(IMAIN,*) 'NEX_ETA:',NEX_ETA,NEX_ETA_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 13')
+ call exit_MPI(myrank,'error in compiled parameters NEX_ETA, please recompile solver')
endif
if (TRANSVERSE_ISOTROPY .NEQV. TRANSVERSE_ISOTROPY_VAL) then
write(IMAIN,*) 'TRANSVERSE_ISOTROPY:',TRANSVERSE_ISOTROPY,TRANSVERSE_ISOTROPY_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 14')
+ call exit_MPI(myrank,'error in compiled parameters, please recompile solver 14')
endif
if (ANISOTROPIC_3D_MANTLE .NEQV. ANISOTROPIC_3D_MANTLE_VAL) then
write(IMAIN,*) 'ANISOTROPIC_3D_MANTLE:',ANISOTROPIC_3D_MANTLE,ANISOTROPIC_3D_MANTLE_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 15')
+ call exit_MPI(myrank,'error in compiled parameters, please recompile solver 15')
endif
if (ANISOTROPIC_INNER_CORE .NEQV. ANISOTROPIC_INNER_CORE_VAL) then
write(IMAIN,*) 'ANISOTROPIC_INNER_CORE:',ANISOTROPIC_INNER_CORE,ANISOTROPIC_INNER_CORE_VAL
- call exit_MPI(myrank,'error in compiled parameters, please recompile solver 16')
+ call exit_MPI(myrank,'error in compiled parameters, please recompile solver 16')
endif
! check simulation pararmeters
@@ -464,57 +429,21 @@
endif
endif
- ! make ellipticity
- if(ELLIPTICITY_VAL) call make_ellipticity(nspl,rspl,espl,espl2,ONE_CRUST)
-
- ! read topography and bathymetry file
- if(myrank == 0 .and. (TOPOGRAPHY .or. OCEANS_VAL)) call read_topo_bathy_file(ibathy_topo)
- ! broadcast the information read on the master to the nodes
- call MPI_BCAST(ibathy_topo,NX_BATHY*NY_BATHY,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
-
- ! set up GLL points, weights and derivation matrices
- call define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube)
-
- if( USE_DEVILLE_PRODUCTS_VAL ) then
-
- ! check that optimized routines from Deville et al. (2002) can be used
- if(NGLLX /= 5 .or. NGLLY /= 5 .or. NGLLZ /= 5) &
- stop 'Deville et al. (2002) routines can only be used if NGLLX = NGLLY = NGLLZ = 5'
-
- ! define transpose of derivation matrix
- do j = 1,NGLLY
- do i = 1,NGLLX
- hprime_xxT(j,i) = hprime_xx(i,j)
- hprimewgll_xxT(j,i) = hprimewgll_xx(i,j)
- enddo
- enddo
+ ! check that buffer size is valid when defining/using buffer_send_chunkcorn_scalar
+ if( NGLOB1D_RADIAL_CM < NGLOB1D_RADIAL_OC .or. NGLOB1D_RADIAL_CM < NGLOB1D_RADIAL_IC ) then
+ call exit_MPI(myrank, 'NGLOB1D_RADIAL_CM must be larger than for outer core or inner core, please check mesh')
endif
- ! counts receiver stations
- if (SIMULATION_TYPE == 1) then
- rec_filename = 'DATA/STATIONS'
- else
- rec_filename = 'DATA/STATIONS_ADJOINT'
+ ! check for GPU runs
+ if( GPU_MODE ) then
+ if( NGLLX /= 5 .or. NGLLY /= 5 .or. NGLLZ /= 5 ) &
+ call exit_mpi(myrank,'GPU mode can only be used if NGLLX == NGLLY == NGLLZ == 5')
+ if( CUSTOM_REAL /= 4 ) &
+ call exit_mpi(myrank,'GPU mode runs only with CUSTOM_REAL == 4')
+ if( ATTENUATION_VAL ) then
+ if( N_SLS /= 3 ) &
+ call exit_mpi(myrank,'GPU mode does not support N_SLS /= 3 yet')
+ endif
endif
- call get_value_string(STATIONS, 'solver.STATIONS', rec_filename)
- ! get total number of receivers
- if(myrank == 0) then
- open(unit=IIN,file=STATIONS,iostat=ios,status='old',action='read')
- nrec = 0
- do while(ios == 0)
- read(IIN,"(a)",iostat=ios) dummystring
- if(ios == 0) nrec = nrec + 1
- enddo
- close(IIN)
- endif
- ! broadcast the information read on the master to the nodes
- call MPI_BCAST(nrec,1,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
- if(nrec < 1) call exit_MPI(myrank,trim(STATIONS)//': need at least one receiver')
-
- end subroutine initialize_simulation
-
-
+ end subroutine initialize_simulation_check
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90 (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,484 @@
+!=====================================================================
+!
+! S p e c f e m 3 D G l o b e V e r s i o n 5 . 1
+! --------------------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA
+! and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine iterate_time()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ use specfem_par_movie
+ implicit none
+
+ include 'mpif.h'
+
+!
+! s t a r t t i m e i t e r a t i o n s
+!
+
+ ! synchronize all processes to make sure everybody is ready to start time loop
+ call sync_all()
+
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'Starting time iteration loop...'
+ write(IMAIN,*)
+ endif
+
+ ! create an empty file to monitor the start of the simulation
+ if(myrank == 0) then
+ open(unit=IOUT,file=trim(OUTPUT_FILES)//'/starttimeloop.txt',status='unknown',action='write')
+ write(IOUT,*) 'hello, starting time loop'
+ close(IOUT)
+ endif
+
+ ! initialize variables for writing seismograms
+ seismo_offset = it_begin-1
+ seismo_current = 0
+
+#ifdef _HANDOPT
+ imodulo_NGLOB_CRUST_MANTLE = mod(NGLOB_CRUST_MANTLE,3)
+ imodulo_NGLOB_CRUST_MANTLE4 = mod(NGLOB_CRUST_MANTLE,4)
+ imodulo_NGLOB_INNER_CORE = mod(NGLOB_INNER_CORE,3)
+ imodulo_NGLOB_OUTER_CORE = mod(NGLOB_OUTER_CORE,3)
+#endif
+
+! get MPI starting time
+ time_start = MPI_WTIME()
+
+! *********************************************************
+! ************* MAIN LOOP OVER THE TIME STEPS *************
+! *********************************************************
+
+ do it = it_begin,it_end
+
+ ! simulation status output and stability check
+ if(mod(it,NTSTEP_BETWEEN_OUTPUT_INFO) == 0 .or. it == 5 .or. it == NSTEP) then
+ call it_check_stability()
+ endif
+
+ ! update displacement using Newmark time scheme
+ call it_update_displacement_scheme()
+
+ ! acoustic solver for outer core
+ ! (needs to be done first, before elastic one)
+ call compute_forces_acoustic()
+
+ ! elastic solver for crust/mantle and inner core
+ call compute_forces_elastic()
+
+ ! restores last time snapshot saved for backward/reconstruction of wavefields
+ ! note: this is done here after the Newmark time scheme, otherwise the indexing for sources
+ ! and adjoint sources will become more complicated
+ ! that is, index it for adjoint sources will match index NSTEP - 1 for backward/reconstructed wavefields
+ if( SIMULATION_TYPE == 3 .and. it == 1 ) then
+ call read_forward_arrays()
+ endif
+
+ ! write the seismograms with time shift
+ call write_seismograms()
+
+ ! adjoint simulations: kernels
+ if( SIMULATION_TYPE == 3 ) then
+ call compute_kernels()
+ endif
+
+ ! outputs movie files
+ if( MOVIE_SURFACE .or. MOVIE_VOLUME ) then
+ call write_movie_output()
+ endif
+
+ ! first step of noise tomography, i.e., save a surface movie at every time step
+ ! modified from the subroutine 'write_movie_surface'
+ if ( NOISE_TOMOGRAPHY == 1 ) then
+ call noise_save_surface_movie(displ_crust_mantle, &
+ ibelm_top_crust_mantle,ibool_crust_mantle, &
+ NSPEC2D_TOP(IREGION_CRUST_MANTLE),noise_surface_movie,it)
+ endif
+
+ enddo ! end of main time loop
+
+!
+!---- end of time iteration loop
+!
+
+ ! Transfer fields from GPU card to host for further analysis
+ if(GPU_MODE) call it_transfer_from_GPU()
+
+ end subroutine iterate_time
+
+!=====================================================================
+
+ subroutine it_update_displacement_scheme()
+
+! explicit Newmark time scheme with acoustic & elastic domains:
+! (see e.g. Hughes, 1987; Chaljub et al., 2003)
+!
+! chi(t+delta_t) = chi(t) + delta_t chi_dot(t) + 1/2 delta_t**2 chi_dot_dot(t)
+! chi_dot(t+delta_t) = chi_dot(t) + 1/2 delta_t chi_dot_dot(t) + 1/2 delta_t chi_dot_dot(t+delta_t)
+! chi_dot_dot(t+delta_t) = 1/M_acoustic( -K_acoustic chi(t+delta) + B_acoustic u(t+delta_t) + f(t+delta_t) )
+!
+! u(t+delta_t) = u(t) + delta_t v(t) + 1/2 delta_t**2 a(t)
+! v(t+delta_t) = v(t) + 1/2 delta_t a(t) + 1/2 delta_t a(t+delta_t)
+! a(t+delta_t) = 1/M_elastic ( -K_elastic u(t+delta) + B_elastic chi_dot_dot(t+delta_t) + f( t+delta_t) )
+!
+! where
+! chi, chi_dot, chi_dot_dot are acoustic (fluid) potentials ( dotted with respect to time)
+! u, v, a are displacement,velocity & acceleration
+! M is mass matrix, K stiffness matrix and B boundary term for acoustic/elastic domains
+! f denotes a source term (acoustic/elastic)
+!
+! note that this stage calculates the predictor terms
+!
+! for
+! potential chi_dot(t+delta) requires + 1/2 delta_t chi_dot_dot(t+delta_t)
+! at a later stage (corrector) once where chi_dot_dot(t+delta) is calculated
+! and similar,
+! velocity v(t+delta_t) requires + 1/2 delta_t a(t+delta_t)
+! at a later stage once where a(t+delta) is calculated
+! also:
+! boundary term B_elastic requires chi_dot_dot(t+delta)
+! thus chi_dot_dot has to be updated first before the elastic boundary term is considered
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ use specfem_par_movie
+ implicit none
+
+ ! local parameters
+ integer :: i
+
+ ! Newmark time scheme update
+#ifdef _HANDOPT_NEWMARK
+! way 2:
+! One common technique in computational science to help enhance pipelining is loop unrolling
+!
+! we're accessing NDIM=3 components at each line,
+! that is, for an iteration, the register must contain
+! NDIM * displ_ + NDIM * veloc_ + NDIM * accel + deltat + deltatsq..
+! in most cases a real (CUSTOM_REAL) value will have 4 bytes,
+! assuming a default cache size of about 128 bytes, we unroll here in steps of 3, thus 29 reals or 118 bytes,
+! rather than with steps of 4
+ ! mantle
+ if(imodulo_NGLOB_CRUST_MANTLE >= 1) then
+ do i = 1,imodulo_NGLOB_CRUST_MANTLE
+ displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
+ + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
+
+ veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
+ + deltatover2*accel_crust_mantle(:,i)
+
+ accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ enddo
+ endif
+ do i = imodulo_NGLOB_CRUST_MANTLE+1,NGLOB_CRUST_MANTLE, 3 ! in steps of 3
+ displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
+ + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
+ displ_crust_mantle(:,i+1) = displ_crust_mantle(:,i+1) &
+ + deltat*veloc_crust_mantle(:,i+1) + deltatsqover2*accel_crust_mantle(:,i+1)
+ displ_crust_mantle(:,i+2) = displ_crust_mantle(:,i+2) &
+ + deltat*veloc_crust_mantle(:,i+2) + deltatsqover2*accel_crust_mantle(:,i+2)
+
+
+ veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
+ + deltatover2*accel_crust_mantle(:,i)
+ veloc_crust_mantle(:,i+1) = veloc_crust_mantle(:,i+1) &
+ + deltatover2*accel_crust_mantle(:,i+1)
+ veloc_crust_mantle(:,i+2) = veloc_crust_mantle(:,i+2) &
+ + deltatover2*accel_crust_mantle(:,i+2)
+
+ ! set acceleration to zero
+ ! note: we do initialize acceleration in this loop since it is read already into the cache,
+ ! otherwise it would have to be read in again for this explicitly,
+ ! which would make this step more expensive
+ accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ accel_crust_mantle(:,i+1) = 0._CUSTOM_REAL
+ accel_crust_mantle(:,i+2) = 0._CUSTOM_REAL
+ enddo
+
+ ! outer core
+ do i=1,NGLOB_OUTER_CORE
+ displ_outer_core(i) = displ_outer_core(i) &
+ + deltat*veloc_outer_core(i) + deltatsqover2*accel_outer_core(i)
+
+ veloc_outer_core(i) = veloc_outer_core(i) &
+ + deltatover2*accel_outer_core(i)
+
+ accel_outer_core(i) = 0._CUSTOM_REAL
+ enddo
+
+ ! inner core
+ if(imodulo_NGLOB_INNER_CORE >= 1) then
+ do i = 1,imodulo_NGLOB_INNER_CORE
+ displ_inner_core(:,i) = displ_inner_core(:,i) &
+ + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
+
+ veloc_inner_core(:,i) = veloc_inner_core(:,i) &
+ + deltatover2*accel_inner_core(:,i)
+
+ accel_inner_core(:,i) = 0._CUSTOM_REAL
+ enddo
+ endif
+ do i = imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE, 3 ! in steps of 3
+ displ_inner_core(:,i) = displ_inner_core(:,i) &
+ + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
+ displ_inner_core(:,i+1) = displ_inner_core(:,i+1) &
+ + deltat*veloc_inner_core(:,i+1) + deltatsqover2*accel_inner_core(:,i+1)
+ displ_inner_core(:,i+2) = displ_inner_core(:,i+2) &
+ + deltat*veloc_inner_core(:,i+2) + deltatsqover2*accel_inner_core(:,i+2)
+
+
+ veloc_inner_core(:,i) = veloc_inner_core(:,i) &
+ + deltatover2*accel_inner_core(:,i)
+ veloc_inner_core(:,i+1) = veloc_inner_core(:,i+1) &
+ + deltatover2*accel_inner_core(:,i+1)
+ veloc_inner_core(:,i+2) = veloc_inner_core(:,i+2) &
+ + deltatover2*accel_inner_core(:,i+2)
+
+ accel_inner_core(:,i) = 0._CUSTOM_REAL
+ accel_inner_core(:,i+1) = 0._CUSTOM_REAL
+ accel_inner_core(:,i+2) = 0._CUSTOM_REAL
+ enddo
+
+#else
+! way 1:
+ ! mantle
+ do i=1,NGLOB_CRUST_MANTLE
+ displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
+ + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
+ veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
+ + deltatover2*accel_crust_mantle(:,i)
+ accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ enddo
+ ! outer core
+ do i=1,NGLOB_OUTER_CORE
+ displ_outer_core(i) = displ_outer_core(i) &
+ + deltat*veloc_outer_core(i) + deltatsqover2*accel_outer_core(i)
+ veloc_outer_core(i) = veloc_outer_core(i) &
+ + deltatover2*accel_outer_core(i)
+ accel_outer_core(i) = 0._CUSTOM_REAL
+ enddo
+ ! inner core
+ do i=1,NGLOB_INNER_CORE
+ displ_inner_core(:,i) = displ_inner_core(:,i) &
+ + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
+ veloc_inner_core(:,i) = veloc_inner_core(:,i) &
+ + deltatover2*accel_inner_core(:,i)
+ accel_inner_core(:,i) = 0._CUSTOM_REAL
+ enddo
+#endif
+
+
+
+
+ ! backward field
+ if (SIMULATION_TYPE == 3) then
+
+#ifdef _HANDOPT_NEWMARK
+! way 2:
+ ! mantle
+ if(imodulo_NGLOB_CRUST_MANTLE >= 1) then
+ do i=1,imodulo_NGLOB_CRUST_MANTLE
+ b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
+ + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
+ b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
+ + b_deltatover2*b_accel_crust_mantle(:,i)
+ b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ enddo
+ endif
+ do i=imodulo_NGLOB_CRUST_MANTLE+1,NGLOB_CRUST_MANTLE,3
+ b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
+ + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
+ b_displ_crust_mantle(:,i+1) = b_displ_crust_mantle(:,i+1) &
+ + b_deltat*b_veloc_crust_mantle(:,i+1) + b_deltatsqover2*b_accel_crust_mantle(:,i+1)
+ b_displ_crust_mantle(:,i+2) = b_displ_crust_mantle(:,i+2) &
+ + b_deltat*b_veloc_crust_mantle(:,i+2) + b_deltatsqover2*b_accel_crust_mantle(:,i+2)
+
+
+ b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
+ + b_deltatover2*b_accel_crust_mantle(:,i)
+ b_veloc_crust_mantle(:,i+1) = b_veloc_crust_mantle(:,i+1) &
+ + b_deltatover2*b_accel_crust_mantle(:,i+1)
+ b_veloc_crust_mantle(:,i+2) = b_veloc_crust_mantle(:,i+2) &
+ + b_deltatover2*b_accel_crust_mantle(:,i+2)
+
+ b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ b_accel_crust_mantle(:,i+1) = 0._CUSTOM_REAL
+ b_accel_crust_mantle(:,i+2) = 0._CUSTOM_REAL
+ enddo
+
+ ! outer core
+ do i=1,NGLOB_OUTER_CORE
+ b_displ_outer_core(i) = b_displ_outer_core(i) &
+ + b_deltat*b_veloc_outer_core(i) + b_deltatsqover2*b_accel_outer_core(i)
+ b_veloc_outer_core(i) = b_veloc_outer_core(i) &
+ + b_deltatover2*b_accel_outer_core(i)
+ b_accel_outer_core(i) = 0._CUSTOM_REAL
+ enddo
+
+ ! inner core
+ if(imodulo_NGLOB_INNER_CORE >= 1) then
+ do i=1,imodulo_NGLOB_INNER_CORE
+ b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
+ + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
+ b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
+ + b_deltatover2*b_accel_inner_core(:,i)
+ b_accel_inner_core(:,i) = 0._CUSTOM_REAL
+ enddo
+ endif
+ do i=imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE,3
+ b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
+ + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
+ b_displ_inner_core(:,i+1) = b_displ_inner_core(:,i+1) &
+ + b_deltat*b_veloc_inner_core(:,i+1) + b_deltatsqover2*b_accel_inner_core(:,i+1)
+ b_displ_inner_core(:,i+2) = b_displ_inner_core(:,i+2) &
+ + b_deltat*b_veloc_inner_core(:,i+2) + b_deltatsqover2*b_accel_inner_core(:,i+2)
+
+ b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
+ + b_deltatover2*b_accel_inner_core(:,i)
+ b_veloc_inner_core(:,i+1) = b_veloc_inner_core(:,i+1) &
+ + b_deltatover2*b_accel_inner_core(:,i+1)
+ b_veloc_inner_core(:,i+2) = b_veloc_inner_core(:,i+2) &
+ + b_deltatover2*b_accel_inner_core(:,i+2)
+
+ b_accel_inner_core(:,i) = 0._CUSTOM_REAL
+ b_accel_inner_core(:,i+1) = 0._CUSTOM_REAL
+ b_accel_inner_core(:,i+2) = 0._CUSTOM_REAL
+ enddo
+#else
+! way 1:
+ ! mantle
+ do i=1,NGLOB_CRUST_MANTLE
+ b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
+ + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
+ b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
+ + b_deltatover2*b_accel_crust_mantle(:,i)
+ b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+ enddo
+ ! outer core
+ do i=1,NGLOB_OUTER_CORE
+ b_displ_outer_core(i) = b_displ_outer_core(i) &
+ + b_deltat*b_veloc_outer_core(i) + b_deltatsqover2*b_accel_outer_core(i)
+ b_veloc_outer_core(i) = b_veloc_outer_core(i) &
+ + b_deltatover2*b_accel_outer_core(i)
+ b_accel_outer_core(i) = 0._CUSTOM_REAL
+ enddo
+ ! inner core
+ do i=1,NGLOB_INNER_CORE
+ b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
+ + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
+ b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
+ + b_deltatover2*b_accel_inner_core(:,i)
+ b_accel_inner_core(:,i) = 0._CUSTOM_REAL
+ enddo
+#endif
+ endif ! SIMULATION_TYPE == 3
+
+ ! integral of strain for adjoint movie volume
+ if(MOVIE_VOLUME .and. (MOVIE_VOLUME_TYPE == 2 .or. MOVIE_VOLUME_TYPE == 3) ) then
+! Iepsilondev_crust_mantle(:,:,:,:,:) = Iepsilondev_crust_mantle(:,:,:,:,:) &
+! + deltat*epsilondev_crust_mantle(:,:,:,:,:)
+ Iepsilondev_crust_mantle(1,:,:,:,:) = Iepsilondev_crust_mantle(1,:,:,:,:) &
+ + deltat*epsilondev_xx_crust_mantle(:,:,:,:)
+ Iepsilondev_crust_mantle(2,:,:,:,:) = Iepsilondev_crust_mantle(2,:,:,:,:) &
+ + deltat*epsilondev_yy_crust_mantle(:,:,:,:)
+ Iepsilondev_crust_mantle(3,:,:,:,:) = Iepsilondev_crust_mantle(3,:,:,:,:) &
+ + deltat*epsilondev_xy_crust_mantle(:,:,:,:)
+ Iepsilondev_crust_mantle(4,:,:,:,:) = Iepsilondev_crust_mantle(4,:,:,:,:) &
+ + deltat*epsilondev_xz_crust_mantle(:,:,:,:)
+ Iepsilondev_crust_mantle(5,:,:,:,:) = Iepsilondev_crust_mantle(5,:,:,:,:) &
+ + deltat*epsilondev_yz_crust_mantle(:,:,:,:)
+
+ Ieps_trace_over_3_crust_mantle(:,:,:,:) = Ieps_trace_over_3_crust_mantle(:,:,:,:) &
+ + deltat*eps_trace_over_3_crust_mantle(:,:,:,:)
+ endif
+
+ end subroutine it_update_displacement_scheme
+
+
+!=====================================================================
+
+ subroutine it_check_stability()
+
+! computes the maximum of the norm of the displacement
+! in all the slices using an MPI reduction
+! and output timestamp file to check that simulation is running fine
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ use specfem_par_movie
+ implicit none
+
+ ! compute the maximum of the norm of the displacement
+ ! in all the slices using an MPI reduction
+ ! and output timestamp file to check that simulation is running fine
+ call check_simulation_stability(it,displ_crust_mantle,displ_inner_core,displ_outer_core, &
+ b_displ_crust_mantle,b_displ_inner_core,b_displ_outer_core, &
+ eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ SIMULATION_TYPE,OUTPUT_FILES,time_start,DT,t0,NSTEP, &
+ myrank)
+
+ ! daniel: debugging
+ !if( maxval(displ_crust_mantle(1,:)**2 + &
+ ! displ_crust_mantle(2,:)**2 + displ_crust_mantle(3,:)**2) > 1.e4 ) then
+ ! print*,'slice',myrank
+ ! print*,' crust_mantle displ:', maxval(displ_crust_mantle(1,:)), &
+ ! maxval(displ_crust_mantle(2,:)),maxval(displ_crust_mantle(3,:))
+ ! print*,' indxs: ',maxloc( displ_crust_mantle(1,:)),maxloc( displ_crust_mantle(2,:)),maxloc( displ_crust_mantle(3,:))
+ ! indx = maxloc( displ_crust_mantle(3,:) )
+ ! rval = xstore_crust_mantle(indx(1))
+ ! thetaval = ystore_crust_mantle(indx(1))
+ ! phival = zstore_crust_mantle(indx(1))
+ ! !thetaval = PI/2.0d0-datan(1.006760466d0*dcos(dble(thetaval))/dmax1(TINYVAL,dsin(dble(thetaval))))
+ ! print*,'r/lat/lon:',rval*R_EARTH_KM,90.0-thetaval*180./PI,phival*180./PI
+ ! call rthetaphi_2_xyz(rval,thetaval,phival,xstore_crust_mantle(indx(1)),&
+ ! ystore_crust_mantle(indx(1)),zstore_crust_mantle(indx(1)))
+ ! print*,'x/y/z:',rval,thetaval,phival
+ ! call exit_MPI(myrank,'error stability')
+ !endif
+
+ end subroutine it_check_stability
+
+!=====================================================================
+
+ subroutine it_transfer_from_GPU()
+
+! transfers fields on GPU back onto CPU
+
+ use specfem_par
+ implicit none
+
+ ! frees allocated memory on GPU
+ call prepare_cleanup_device(Mesh_pointer)
+
+ end subroutine it_transfer_from_GPU
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -703,8 +703,7 @@
! main process broadcasts the results to all the slices
call MPI_BCAST(nrec,1,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
- call MPI_BARRIER(MPI_COMM_WORLD,ier)
-
+
call MPI_BCAST(islice_selected_rec,nrec,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
call MPI_BCAST(ispec_selected_rec,nrec,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
call MPI_BCAST(xi_receiver,nrec,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -25,94 +25,194 @@
!
!=====================================================================
- subroutine prepare_timerun_rmass(myrank,rmass_ocean_load,rmass_crust_mantle, &
- rmass_outer_core,rmass_inner_core, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle, &
- iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core, &
- iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core, &
- iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces,imsg_type, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces_scalar,buffer_received_faces_scalar, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
- NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
- NGLOB1D_RADIAL,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,npoin2D_max_all_CM_IC)
+ subroutine prepare_timerun()
+ use specfem_par
+ use specfem_par_movie
implicit none
-
+
include 'mpif.h'
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
- integer myrank,npoin2D_max_all_CM_IC
+ ! get MPI starting time
+ time_start = MPI_WTIME()
+
+ ! user output infos
+ call prepare_timerun_user_output()
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE_OCEANS) :: rmass_ocean_load
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: rmass_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: rmass_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: rmass_inner_core
+ ! sets up mass matrices
+ call prepare_timerun_mass_matrices()
- integer ichunk,iproc_xi,iproc_eta
- integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+ ! convert x/y/z into r/theta/phi spherical coordinates
+ call prepare_timerun_convert_coord()
+
+ ! allocate files to save movies
+ ! for noise tomography, store_val_x/y/z/ux/uy/uz needed for 'surface movie'
+ if(MOVIE_SURFACE .or. NOISE_TOMOGRAPHY /= 0 ) then
+ call prepare_timerun_movie_surface()
+ endif
- ! 2-D addressing and buffers for summation between slices
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
+ ! output point and element information for 3D movies
+ if(MOVIE_VOLUME) call prepare_timerun_movie_volume()
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_OC) :: iboolleft_xi_outer_core,iboolright_xi_outer_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_OC) :: iboolleft_eta_outer_core,iboolright_eta_outer_core
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
+ ! sets up time increments and rotation constants
+ call prepare_timerun_constants()
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle
+ ! precomputes gravity factors
+ call prepare_timerun_gravity()
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_outer_core,npoin2D_eta_outer_core
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_inner_core,npoin2D_eta_inner_core
+ ! precomputes attenuation factors
+ if(ATTENUATION_VAL) call prepare_timerun_attenuation()
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle, &
- iboolfaces_outer_core,iboolfaces_inner_core
+ ! initializes arrays
+ call prepare_timerun_init_wavefield()
- integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
- integer npoin2D_faces_outer_core(NUMFACES_SHARED)
- integer npoin2D_faces_inner_core(NUMFACES_SHARED)
+ ! reads files back from local disk or MT tape system if restart file
+ ! note: for SIMULATION_TYPE 3 simulations, the stored wavefields
+ ! will be read in the time loop after the Newmark time scheme update.
+ ! this makes indexing and timing easier to match with adjoint wavefields indexing.
+ call read_forward_arrays_startrun()
- ! indirect addressing for each corner of the chunks
- integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
- integer, dimension(NGLOB1D_RADIAL_OC,NUMCORNERS_SHARED) :: iboolcorner_outer_core
- integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
+ ! prepares noise simulations
+ call prepare_timerun_noise()
- ! communication pattern for faces between chunks
- integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces,imsg_type
- ! communication pattern for corners between chunks
- integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+ ! prepares GPU arrays
+ if(GPU_MODE) call prepare_timerun_GPU()
- ! buffers for send and receive between faces of the slices and the chunks
- real(kind=CUSTOM_REAL), dimension(NGLOB2DMAX_XY_VAL) :: &
- buffer_send_faces_scalar,buffer_received_faces_scalar
+ ! user output
+ if( myrank == 0 ) then
+ ! elapsed time since beginning of mesh generation
+ tCPU = MPI_WTIME() - time_start
+ write(IMAIN,*)
+ write(IMAIN,*) 'Elapsed time for preparing timerun in seconds = ',sngl(tCPU)
+ write(IMAIN,*)
+ endif
- ! buffers for send and receive between corners of the chunks
- real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL_CM) :: &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar
+ end subroutine prepare_timerun
- integer NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS
+!
+!-------------------------------------------------------------------------------------------------
+!
- integer, dimension(MAX_NUM_REGIONS) :: NGLOB1D_RADIAL,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX
- integer NGLOB2DMAX_XY
+ subroutine prepare_timerun_user_output()
+ use specfem_par
+ implicit none
+
+ ! user output
+ if(myrank == 0) then
+
+ write(IMAIN,*)
+ write(IMAIN,*) 'Reference radius of the Earth used is ',R_EARTH_KM,' km'
+ write(IMAIN,*)
+
+ write(IMAIN,*)
+ if(OCEANS_VAL) then
+ write(IMAIN,*) 'incorporating the oceans using equivalent load'
+ else
+ write(IMAIN,*) 'no oceans'
+ endif
+
+ write(IMAIN,*)
+ if(ELLIPTICITY_VAL) then
+ write(IMAIN,*) 'incorporating ellipticity'
+ else
+ write(IMAIN,*) 'no ellipticity'
+ endif
+
+ write(IMAIN,*)
+ if(TOPOGRAPHY) then
+ write(IMAIN,*) 'incorporating surface topography'
+ else
+ write(IMAIN,*) 'no surface topography'
+ endif
+
+ write(IMAIN,*)
+ if(GRAVITY_VAL) then
+ write(IMAIN,*) 'incorporating self-gravitation (Cowling approximation)'
+ else
+ write(IMAIN,*) 'no self-gravitation'
+ endif
+
+ write(IMAIN,*)
+ if(ROTATION_VAL) then
+ write(IMAIN,*) 'incorporating rotation'
+ else
+ write(IMAIN,*) 'no rotation'
+ endif
+
+ write(IMAIN,*)
+ if(ATTENUATION_VAL) then
+ write(IMAIN,*) 'incorporating attenuation using ',N_SLS,' standard linear solids'
+
+ if(ATTENUATION_3D_VAL) write(IMAIN,*) 'using 3D attenuation'
+
+ if(USE_ATTENUATION_MIMIC ) write(IMAIN,*) 'mimicking effects on velocity only'
+ else
+ write(IMAIN,*) 'no attenuation'
+ endif
+
+ write(IMAIN,*)
+ write(IMAIN,*)
+ write(IMAIN,*)
+
+ endif
+
+ end subroutine prepare_timerun_user_output
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine prepare_timerun_mass_matrices()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ implicit none
+
+ ! mass matrices need to be assembled with MPI here once and for all
+ call prepare_timerun_rmass_assembly()
+
+ ! check that all the mass matrices are positive
+ if(OCEANS_VAL) then
+ if(minval(rmass_ocean_load) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the oceans')
+ endif
+ if(minval(rmass_crust_mantle) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the crust_mantle')
+ if(minval(rmass_inner_core) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the inner core')
+ if(minval(rmass_outer_core) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the outer core')
+
+ ! for efficiency, invert final mass matrix once and for all on each slice
+ if(OCEANS_VAL) rmass_ocean_load = 1._CUSTOM_REAL / rmass_ocean_load
+
+ rmass_crust_mantle = 1._CUSTOM_REAL / rmass_crust_mantle
+ rmass_outer_core = 1._CUSTOM_REAL / rmass_outer_core
+ rmass_inner_core = 1._CUSTOM_REAL / rmass_inner_core
+
+ end subroutine prepare_timerun_mass_matrices
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine prepare_timerun_rmass_assembly()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ implicit none
+
! local parameters
- integer :: ier
+ integer :: ndim_assemble
+ ! temporary buffers for send and receive between faces of the slices and the chunks
+ real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC) :: &
+ buffer_send_faces_scalar,buffer_received_faces_scalar
+
! synchronize all the processes before assembling the mass matrix
! to make sure all the nodes have finished to read their databases
- call MPI_BARRIER(MPI_COMM_WORLD,ier)
+ call sync_all()
! the mass matrix needs to be assembled with MPI here once and for all
@@ -174,141 +274,208 @@
NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_INNER_CORE), &
NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE),NGLOB2DMAX_XY,NCHUNKS_VAL)
+
+ ! mass matrix including central cube
+ if(INCLUDE_CENTRAL_CUBE) then
+ ! the mass matrix to assemble is a scalar, not a vector
+ ndim_assemble = 1
+
+ ! use central cube buffers to assemble the inner core mass matrix with the central cube
+ call assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices, buffer_all_cube_from_slices, &
+ buffer_slices, buffer_slices2, ibool_central_cube, &
+ receiver_cube_from_slices, ibool_inner_core, &
+ idoubling_inner_core, NSPEC_INNER_CORE, &
+ ibelm_bottom_inner_core, NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
+ NGLOB_INNER_CORE, &
+ rmass_inner_core,ndim_assemble)
+
+ ! suppress fictitious mass matrix elements in central cube
+ ! because the slices do not compute all their spectral elements in the cube
+ where(rmass_inner_core(:) <= 0.) rmass_inner_core = 1.
+ endif
+
if(myrank == 0) write(IMAIN,*) 'end assembling MPI mass matrix'
- end subroutine prepare_timerun_rmass
+ end subroutine prepare_timerun_rmass_assembly
!
!-------------------------------------------------------------------------------------------------
!
- subroutine prepare_timerun_centralcube(myrank,rmass_inner_core, &
- iproc_xi,iproc_eta,ichunk, &
- NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX,NSPEC2D_BOTTOM, &
- addressing,ibool_inner_core,idoubling_inner_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
- nspec2D_ymin_inner_core,nspec2D_ymax_inner_core, &
- ibelm_xmin_inner_core,ibelm_xmax_inner_core, &
- ibelm_ymin_inner_core,ibelm_ymax_inner_core,ibelm_bottom_inner_core, &
- nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube, &
- npoin2D_cube_from_slices,receiver_cube_from_slices, &
- sender_from_slices_to_cube,ibool_central_cube, &
- buffer_slices,buffer_slices2,buffer_all_cube_from_slices)
+ subroutine prepare_timerun_convert_coord()
+! converts x/y/z into r/theta/phi spherical coordinates
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
implicit none
- include 'mpif.h'
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+ ! local parameters
+ integer :: i
+ real(kind=CUSTOM_REAL) :: rval,thetaval,phival
+
+ ! change x, y, z to r, theta and phi once and for all
+ ! IMPROVE dangerous: old name kept (xstore ystore zstore) for new values
- integer myrank
+ ! convert in the crust and mantle
+ do i = 1,NGLOB_CRUST_MANTLE
+ call xyz_2_rthetaphi(xstore_crust_mantle(i), &
+ ystore_crust_mantle(i), &
+ zstore_crust_mantle(i),rval,thetaval,phival)
+ xstore_crust_mantle(i) = rval
+ ystore_crust_mantle(i) = thetaval
+ zstore_crust_mantle(i) = phival
+ enddo
- real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: rmass_inner_core
+ ! convert in the outer core
+ do i = 1,NGLOB_OUTER_CORE
+ call xyz_2_rthetaphi(xstore_outer_core(i), &
+ ystore_outer_core(i), &
+ zstore_outer_core(i),rval,thetaval,phival)
+ xstore_outer_core(i) = rval
+ ystore_outer_core(i) = thetaval
+ zstore_outer_core(i) = phival
+ enddo
- integer ichunk,iproc_xi,iproc_eta
+ ! convert in the inner core
+ do i = 1,NGLOB_INNER_CORE
+ call xyz_2_rthetaphi(xstore_inner_core(i), &
+ ystore_inner_core(i), &
+ zstore_inner_core(i),rval,thetaval,phival)
+ xstore_inner_core(i) = rval
+ ystore_inner_core(i) = thetaval
+ zstore_inner_core(i) = phival
+ enddo
- integer, dimension(MAX_NUM_REGIONS) :: NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX,NSPEC2D_BOTTOM
- integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+ end subroutine prepare_timerun_convert_coord
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
- integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: &
- xstore_inner_core,ystore_inner_core,zstore_inner_core
+!
+!-------------------------------------------------------------------------------------------------
+!
- integer nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
- nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
+ subroutine prepare_timerun_movie_surface()
- integer, dimension(NSPEC2DMAX_XMIN_XMAX_IC) :: ibelm_xmin_inner_core,ibelm_xmax_inner_core
- integer, dimension(NSPEC2DMAX_YMIN_YMAX_IC) :: ibelm_ymin_inner_core,ibelm_ymax_inner_core
- integer, dimension(NSPEC2D_BOTTOM_IC) :: ibelm_bottom_inner_core
-
- integer nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube, &
- npoin2D_cube_from_slices,receiver_cube_from_slices
-
- integer, dimension(non_zero_nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
- integer, dimension(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices) :: ibool_central_cube
- double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices,buffer_slices2
- double precision, dimension(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM) :: &
- buffer_all_cube_from_slices
-
+ use specfem_par
+ use specfem_par_movie
+ implicit none
+
! local parameters
- integer :: ndim_assemble
+ integer :: ier
- ! create buffers to assemble with the central cube
- call create_central_cube_buffers(myrank,iproc_xi,iproc_eta,ichunk, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NCHUNKS_VAL,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
- NSPEC2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NSPEC2DMAX_YMIN_YMAX(IREGION_INNER_CORE), &
- NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
- addressing,ibool_inner_core,idoubling_inner_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
- nspec2D_ymin_inner_core,nspec2D_ymax_inner_core, &
- ibelm_xmin_inner_core,ibelm_xmax_inner_core, &
- ibelm_ymin_inner_core,ibelm_ymax_inner_core,ibelm_bottom_inner_core, &
- nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices, &
- receiver_cube_from_slices,sender_from_slices_to_cube,ibool_central_cube, &
- buffer_slices,buffer_slices2,buffer_all_cube_from_slices)
+ if(MOVIE_COARSE .and. NOISE_TOMOGRAPHY ==0) then ! only output corners !for noise tomography, must NOT be coarse
+ nmovie_points = 2 * 2 * NSPEC2D_TOP(IREGION_CRUST_MANTLE)
+ if(NGLLX /= NGLLY) &
+ call exit_MPI(myrank,'MOVIE_COARSE together with MOVIE_SURFACE requires NGLLX=NGLLY')
+ NIT = NGLLX - 1
+ else
+ nmovie_points = NGLLX * NGLLY * NSPEC2D_TOP(IREGION_CRUST_MANTLE)
+ NIT = 1
+ endif
+ allocate(store_val_x(nmovie_points), &
+ store_val_y(nmovie_points), &
+ store_val_z(nmovie_points), &
+ store_val_ux(nmovie_points), &
+ store_val_uy(nmovie_points), &
+ store_val_uz(nmovie_points),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating movie surface arrays')
- if(myrank == 0) write(IMAIN,*) 'done including central cube'
+ if (MOVIE_SURFACE) then ! those arrays are not neccessary for noise tomography, so only allocate them in MOVIE_SURFACE case
+ allocate(store_val_x_all(nmovie_points,0:NPROCTOT_VAL-1), &
+ store_val_y_all(nmovie_points,0:NPROCTOT_VAL-1), &
+ store_val_z_all(nmovie_points,0:NPROCTOT_VAL-1), &
+ store_val_ux_all(nmovie_points,0:NPROCTOT_VAL-1), &
+ store_val_uy_all(nmovie_points,0:NPROCTOT_VAL-1), &
+ store_val_uz_all(nmovie_points,0:NPROCTOT_VAL-1),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating movie surface all arrays')
+ endif
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'Movie surface:'
+ write(IMAIN,*) ' Writing to moviedata*** files in output directory'
+ if(MOVIE_VOLUME_TYPE == 5) then
+ write(IMAIN,*) ' movie output: displacement'
+ else
+ write(IMAIN,*) ' movie output: velocity'
+ endif
+ write(IMAIN,*) ' time steps every: ',NTSTEP_BETWEEN_FRAMES
+ endif
- ! the mass matrix to assemble is a scalar, not a vector
- ndim_assemble = 1
+ end subroutine prepare_timerun_movie_surface
- ! use these buffers to assemble the inner core mass matrix with the central cube
- call assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
- npoin2D_cube_from_slices, buffer_all_cube_from_slices, &
- buffer_slices, buffer_slices2, ibool_central_cube, &
- receiver_cube_from_slices, ibool_inner_core, &
- idoubling_inner_core, NSPEC_INNER_CORE, &
- ibelm_bottom_inner_core, NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
- NGLOB_INNER_CORE,rmass_inner_core,ndim_assemble)
-
- ! suppress fictitious mass matrix elements in central cube
- ! because the slices do not compute all their spectral elements in the cube
- where(rmass_inner_core(:) <= 0.) rmass_inner_core = 1.
-
- end subroutine prepare_timerun_centralcube
-
!
!-------------------------------------------------------------------------------------------------
!
- subroutine prepare_timerun_constants(myrank,NSTEP, &
- DT,t0,scale_t,scale_t_inv,scale_displ,scale_veloc, &
- deltat,deltatover2,deltatsqover2, &
- b_deltat,b_deltatover2,b_deltatsqover2, &
- two_omega_earth,A_array_rotation,B_array_rotation, &
- b_two_omega_earth, SIMULATION_TYPE)
+ subroutine prepare_timerun_movie_volume()
-! precomputes constants for time integration
-
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_movie
implicit none
+
+ ! local parameters
+ integer :: ier
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+ ! the following has to be true for the the array dimensions of eps to match with those of xstore etc..
+ ! note that epsilondev and eps_trace_over_3 don't have the same dimensions.. could cause trouble
+ if (NSPEC_CRUST_MANTLE_STR_OR_ATT /= NSPEC_CRUST_MANTLE) &
+ stop 'NSPEC_CRUST_MANTLE_STRAINS_ATT /= NSPEC_CRUST_MANTLE'
+ if (NSPEC_CRUST_MANTLE_STRAIN_ONLY /= NSPEC_CRUST_MANTLE) &
+ stop 'NSPEC_CRUST_MANTLE_STRAIN_ONLY /= NSPEC_CRUST_MANTLE'
- integer myrank,NSTEP
+ write(prname,'(a,i6.6,a)') trim(LOCAL_PATH)//'/'//'proc',myrank,'_'
+ call count_points_movie_volume(prname,ibool_crust_mantle, xstore_crust_mantle,ystore_crust_mantle, &
+ zstore_crust_mantle,MOVIE_TOP,MOVIE_BOTTOM,MOVIE_WEST,MOVIE_EAST,MOVIE_NORTH,MOVIE_SOUTH, &
+ MOVIE_COARSE,npoints_3dmovie,nspecel_3dmovie,num_ibool_3dmovie,mask_ibool,mask_3dmovie)
- double precision DT
- double precision t0
+ allocate(nu_3dmovie(3,3,npoints_3dmovie),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating nu for 3d movie')
+ call write_movie_volume_mesh(npoints_3dmovie,prname,ibool_crust_mantle,xstore_crust_mantle, &
+ ystore_crust_mantle,zstore_crust_mantle, muvstore_crust_mantle_3dmovie, &
+ mask_3dmovie,mask_ibool,num_ibool_3dmovie,nu_3dmovie,MOVIE_COARSE)
- double precision scale_t,scale_t_inv,scale_displ,scale_veloc
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'Movie volume:'
+ write(IMAIN,*) ' Writing to movie3D*** files on local disk databases directory'
+ if(MOVIE_VOLUME_TYPE == 1) then
+ write(IMAIN,*) ' movie output: strain'
+ else if(MOVIE_VOLUME_TYPE == 2) then
+ write(IMAIN,*) ' movie output: time integral of strain'
+ else if(MOVIE_VOLUME_TYPE == 3) then
+ write(IMAIN,*) ' movie output: potency or integral of strain'
+ else if(MOVIE_VOLUME_TYPE == 4) then
+ write(IMAIN,*) ' movie output: divergence and curl'
+ else if(MOVIE_VOLUME_TYPE == 5) then
+ write(IMAIN,*) ' movie output: displacement'
+ else if(MOVIE_VOLUME_TYPE == 6) then
+ write(IMAIN,*) ' movie output: velocity'
+ endif
+ write(IMAIN,*) ' depth(T,B):',MOVIE_TOP,MOVIE_BOTTOM
+ write(IMAIN,*) ' lon(W,E) :',MOVIE_WEST,MOVIE_EAST
+ write(IMAIN,*) ' lat(S,N) :',MOVIE_SOUTH,MOVIE_NORTH
+ write(IMAIN,*) ' Starting at time step:',MOVIE_START, 'ending at:',MOVIE_STOP,'every: ',NTSTEP_BETWEEN_FRAMES
+ endif
- real(kind=CUSTOM_REAL) deltat,deltatover2,deltatsqover2
- real(kind=CUSTOM_REAL) b_deltat,b_deltatover2,b_deltatsqover2
+ if( MOVIE_VOLUME_TYPE < 1 .or. MOVIE_VOLUME_TYPE > 6) &
+ call exit_MPI(myrank, 'MOVIE_VOLUME_TYPE has to be 1,2,3,4,5 or 6')
- real(kind=CUSTOM_REAL) two_omega_earth
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
- A_array_rotation,B_array_rotation
+ end subroutine prepare_timerun_movie_volume
- real(kind=CUSTOM_REAL) b_two_omega_earth
+!
+!-------------------------------------------------------------------------------------------------
+!
- integer SIMULATION_TYPE
+ subroutine prepare_timerun_constants()
- ! local parameters
+! precomputes constants for time integration
+ use specfem_par
+ implicit none
if(myrank == 0) then
write(IMAIN,*)
@@ -386,34 +553,13 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine prepare_timerun_gravity(myrank, &
- minus_g_cmb,minus_g_icb, &
- minus_gravity_table,minus_deriv_gravity_table, &
- density_table,d_ln_density_dr_table,minus_rho_g_over_kappa_fluid, &
- ONE_CRUST,RICB,RCMB,RTOPDDOUBLEPRIME, &
- R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN)
+ subroutine prepare_timerun_gravity()
! precomputes gravity factors
+ use specfem_par
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
-
- integer myrank
-
- real(kind=CUSTOM_REAL) minus_g_cmb,minus_g_icb
-
- ! lookup table every km for gravity
- double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table, &
- minus_deriv_gravity_table,density_table, &
- d_ln_density_dr_table,minus_rho_g_over_kappa_fluid
-
- logical ONE_CRUST
-
- double precision RICB,RCMB,RTOPDDOUBLEPRIME, &
- R80,R220,R400,R600,R670,R771,RMOHO,RMIDDLE_CRUST,ROCEAN
-
! local parameters
double precision :: rspl_gravity(NR),gspl(NR),gspl2(NR)
double precision :: radius,radius_km,g,dg
@@ -495,70 +641,16 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine prepare_timerun_attenuation(myrank, &
- factor_scale_crust_mantle,one_minus_sum_beta_crust_mantle,factor_common_crust_mantle, &
- factor_scale_inner_core,one_minus_sum_beta_inner_core,factor_common_inner_core, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle, &
- c33store_crust_mantle,c44store_crust_mantle, &
- c55store_crust_mantle,c66store_crust_mantle, &
- muvstore_crust_mantle,muhstore_crust_mantle,ispec_is_tiso_crust_mantle, &
- !----- idoubling_crust_mantle, &
- muvstore_inner_core, &
- SIMULATION_TYPE,MOVIE_VOLUME,muvstore_crust_mantle_3dmovie, &
- c11store_inner_core,c12store_inner_core,c13store_inner_core, &
- c33store_inner_core,c44store_inner_core, &
- alphaval,betaval,gammaval,b_alphaval,b_betaval,b_gammaval, &
- deltat,b_deltat,LOCAL_PATH)
+ subroutine prepare_timerun_attenuation()
! precomputes attenuation factors
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_movie
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
-
- integer myrank
-
- ! memory variables and standard linear solids for attenuation
- real(kind=CUSTOM_REAL), dimension(ATT1,ATT2,ATT3,ATT4) :: one_minus_sum_beta_crust_mantle, factor_scale_crust_mantle
- real(kind=CUSTOM_REAL), dimension(ATT1,ATT2,ATT3,ATT5) :: one_minus_sum_beta_inner_core, factor_scale_inner_core
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1,ATT2,ATT3,ATT4) :: factor_common_crust_mantle
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1,ATT2,ATT3,ATT5) :: factor_common_inner_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle, &
- c33store_crust_mantle,c44store_crust_mantle, &
- c55store_crust_mantle,c66store_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
- muvstore_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
- muhstore_crust_mantle
-! integer, dimension(NSPEC_CRUST_MANTLE) :: idoubling_crust_mantle
- logical, dimension(NSPEC_CRUST_MANTLE) :: ispec_is_tiso_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
- muvstore_inner_core
-
-
- integer SIMULATION_TYPE
- logical MOVIE_VOLUME
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: muvstore_crust_mantle_3dmovie
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_IC) :: &
- c11store_inner_core,c33store_inner_core,c12store_inner_core, &
- c13store_inner_core,c44store_inner_core
-
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval, betaval, gammaval
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: b_alphaval, b_betaval, b_gammaval
-
- real(kind=CUSTOM_REAL) deltat,b_deltat
-
- character(len=150) LOCAL_PATH
-
! local parameters
double precision, dimension(ATT1,ATT2,ATT3,ATT4) :: omsb_crust_mantle_dble, factor_scale_crust_mantle_dble
double precision, dimension(ATT1,ATT2,ATT3,ATT5) :: omsb_inner_core_dble, factor_scale_inner_core_dble
@@ -570,18 +662,18 @@
double precision :: scale_factor,scale_factor_minus_one
real(kind=CUSTOM_REAL) :: mul
integer :: ispec,i,j,k
- character(len=150) :: prname
+ character(len=150) :: prnamel
! get and store PREM attenuation model
! CRUST_MANTLE ATTENUATION
- call create_name_database(prname, myrank, IREGION_CRUST_MANTLE, LOCAL_PATH)
- call get_attenuation_model_3D(myrank, prname, omsb_crust_mantle_dble, &
+ call create_name_database(prnamel, myrank, IREGION_CRUST_MANTLE, LOCAL_PATH)
+ call get_attenuation_model_3D(myrank, prnamel, omsb_crust_mantle_dble, &
factor_common_crust_mantle_dble,factor_scale_crust_mantle_dble,tau_sigma_dble,NSPEC_CRUST_MANTLE)
! INNER_CORE ATTENUATION
- call create_name_database(prname, myrank, IREGION_INNER_CORE, LOCAL_PATH)
- call get_attenuation_model_3D(myrank, prname, omsb_inner_core_dble, &
+ call create_name_database(prnamel, myrank, IREGION_INNER_CORE, LOCAL_PATH)
+ call get_attenuation_model_3D(myrank, prnamel, omsb_inner_core_dble, &
factor_common_inner_core_dble,factor_scale_inner_core_dble,tau_sigma_dble,NSPEC_INNER_CORE)
if(CUSTOM_REAL == SIZE_REAL) then
@@ -715,3 +807,484 @@
endif
end subroutine prepare_timerun_attenuation
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine prepare_timerun_init_wavefield()
+
+! initializes arrays
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ use specfem_par_movie
+ implicit none
+
+ ! local parameters
+ integer :: ier
+
+ ! initialize arrays to zero
+ displ_crust_mantle(:,:) = 0._CUSTOM_REAL
+ veloc_crust_mantle(:,:) = 0._CUSTOM_REAL
+ accel_crust_mantle(:,:) = 0._CUSTOM_REAL
+
+ displ_outer_core(:) = 0._CUSTOM_REAL
+ veloc_outer_core(:) = 0._CUSTOM_REAL
+ accel_outer_core(:) = 0._CUSTOM_REAL
+
+ displ_inner_core(:,:) = 0._CUSTOM_REAL
+ veloc_inner_core(:,:) = 0._CUSTOM_REAL
+ accel_inner_core(:,:) = 0._CUSTOM_REAL
+
+ ! put negligible initial value to avoid very slow underflow trapping
+ if(FIX_UNDERFLOW_PROBLEM) then
+ displ_crust_mantle(:,:) = VERYSMALLVAL
+ displ_outer_core(:) = VERYSMALLVAL
+ displ_inner_core(:,:) = VERYSMALLVAL
+ endif
+
+ ! if doing benchmark runs to measure scaling of the code,
+ ! set the initial field to 1 to make sure gradual underflow trapping does not slow down the code
+ if (DO_BENCHMARK_RUN_ONLY .and. SET_INITIAL_FIELD_TO_1_IN_BENCH) then
+ displ_crust_mantle(:,:) = 1._CUSTOM_REAL
+ veloc_crust_mantle(:,:) = 1._CUSTOM_REAL
+ accel_crust_mantle(:,:) = 1._CUSTOM_REAL
+
+ displ_outer_core(:) = 1._CUSTOM_REAL
+ veloc_outer_core(:) = 1._CUSTOM_REAL
+ accel_outer_core(:) = 1._CUSTOM_REAL
+
+ displ_inner_core(:,:) = 1._CUSTOM_REAL
+ veloc_inner_core(:,:) = 1._CUSTOM_REAL
+ accel_inner_core(:,:) = 1._CUSTOM_REAL
+ endif
+
+ if (SIMULATION_TYPE == 3) then
+ rho_kl_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
+ beta_kl_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
+ alpha_kl_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
+ if (NOISE_TOMOGRAPHY == 3) Sigma_kl_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
+
+ ! approximate hessian
+ if( APPROXIMATE_HESS_KL ) then
+ allocate( hess_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating hessian')
+ hess_kl_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
+ endif
+
+ ! For anisotropic kernels (in crust_mantle only)
+ cijkl_kl_crust_mantle(:,:,:,:,:) = 0._CUSTOM_REAL
+
+ rho_kl_outer_core(:,:,:,:) = 0._CUSTOM_REAL
+ alpha_kl_outer_core(:,:,:,:) = 0._CUSTOM_REAL
+
+ rho_kl_inner_core(:,:,:,:) = 0._CUSTOM_REAL
+ beta_kl_inner_core(:,:,:,:) = 0._CUSTOM_REAL
+ alpha_kl_inner_core(:,:,:,:) = 0._CUSTOM_REAL
+
+ div_displ_outer_core(:,:,:,:) = 0._CUSTOM_REAL
+ b_div_displ_outer_core(:,:,:,:) = 0._CUSTOM_REAL
+
+ ! deviatoric kernel check
+ if( deviatoric_outercore) then
+ nspec_beta_kl_outer_core = NSPEC_OUTER_CORE_ADJOINT
+ else
+ nspec_beta_kl_outer_core = 1
+ endif
+ allocate(beta_kl_outer_core(NGLLX,NGLLY,NGLLZ,nspec_beta_kl_outer_core),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating beta outercore')
+ beta_kl_outer_core(:,:,:,:) = 0._CUSTOM_REAL
+ endif
+
+ ! initialize to be on the save side for adjoint runs SIMULATION_TYPE==2
+ eps_trace_over_3_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
+
+ epsilondev_xx_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_yy_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_xy_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_xz_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_yz_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
+
+ eps_trace_over_3_inner_core(:,:,:,:) = 0._CUSTOM_REAL
+
+ epsilondev_xx_inner_core(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_yy_inner_core(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_xy_inner_core(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_xz_inner_core(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_yz_inner_core(:,:,:,:) = 0._CUSTOM_REAL
+
+ if(FIX_UNDERFLOW_PROBLEM) then
+ eps_trace_over_3_crust_mantle(:,:,:,:) = VERYSMALLVAL
+
+ epsilondev_xx_crust_mantle(:,:,:,:) = VERYSMALLVAL
+ epsilondev_yy_crust_mantle(:,:,:,:) = VERYSMALLVAL
+ epsilondev_xy_crust_mantle(:,:,:,:) = VERYSMALLVAL
+ epsilondev_xz_crust_mantle(:,:,:,:) = VERYSMALLVAL
+ epsilondev_yz_crust_mantle(:,:,:,:) = VERYSMALLVAL
+
+ eps_trace_over_3_inner_core(:,:,:,:) = VERYSMALLVAL
+
+ epsilondev_xx_inner_core(:,:,:,:) = VERYSMALLVAL
+ epsilondev_yy_inner_core(:,:,:,:) = VERYSMALLVAL
+ epsilondev_xy_inner_core(:,:,:,:) = VERYSMALLVAL
+ epsilondev_xz_inner_core(:,:,:,:) = VERYSMALLVAL
+ epsilondev_yz_inner_core(:,:,:,:) = VERYSMALLVAL
+
+ endif
+
+ if (COMPUTE_AND_STORE_STRAIN) then
+ if(MOVIE_VOLUME .and. (MOVIE_VOLUME_TYPE == 2 .or. MOVIE_VOLUME_TYPE == 3)) then
+ Iepsilondev_crust_mantle(:,:,:,:,:) = 0._CUSTOM_REAL
+ Ieps_trace_over_3_crust_mantle(:,:,:,:)=0._CUSTOM_REAL
+ endif
+ endif
+
+ ! clear memory variables if attenuation
+ if(ATTENUATION_VAL) then
+ R_xx_crust_mantle(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_yy_crust_mantle(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_xy_crust_mantle(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_xz_crust_mantle(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_yz_crust_mantle(:,:,:,:,:) = 0._CUSTOM_REAL
+
+ R_xx_inner_core(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_yy_inner_core(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_xy_inner_core(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_xz_inner_core(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_yz_inner_core(:,:,:,:,:) = 0._CUSTOM_REAL
+
+ if(FIX_UNDERFLOW_PROBLEM) then
+ R_xx_crust_mantle(:,:,:,:,:) = VERYSMALLVAL
+ R_yy_crust_mantle(:,:,:,:,:) = VERYSMALLVAL
+ R_xy_crust_mantle(:,:,:,:,:) = VERYSMALLVAL
+ R_xz_crust_mantle(:,:,:,:,:) = VERYSMALLVAL
+ R_yz_crust_mantle(:,:,:,:,:) = VERYSMALLVAL
+
+ R_xx_inner_core(:,:,:,:,:) = VERYSMALLVAL
+ R_yy_inner_core(:,:,:,:,:) = VERYSMALLVAL
+ R_xy_inner_core(:,:,:,:,:) = VERYSMALLVAL
+ R_xz_inner_core(:,:,:,:,:) = VERYSMALLVAL
+ R_yz_inner_core(:,:,:,:,:) = VERYSMALLVAL
+ endif
+ endif
+
+ end subroutine prepare_timerun_init_wavefield
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine prepare_timerun_noise()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_movie
+ implicit none
+ ! local parameters
+ integer :: ier
+
+ ! NOISE TOMOGRAPHY
+ if ( NOISE_TOMOGRAPHY /= 0 ) then
+ allocate(noise_sourcearray(NDIM,NGLLX,NGLLY,NGLLZ,NSTEP), &
+ normal_x_noise(nmovie_points), &
+ normal_y_noise(nmovie_points), &
+ normal_z_noise(nmovie_points), &
+ mask_noise(nmovie_points), &
+ noise_surface_movie(NDIM,NGLLX,NGLLY,NSPEC2D_TOP(IREGION_CRUST_MANTLE)),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating noise arrays')
+
+ noise_sourcearray(:,:,:,:,:) = 0._CUSTOM_REAL
+ normal_x_noise(:) = 0._CUSTOM_REAL
+ normal_y_noise(:) = 0._CUSTOM_REAL
+ normal_z_noise(:) = 0._CUSTOM_REAL
+ mask_noise(:) = 0._CUSTOM_REAL
+ noise_surface_movie(:,:,:,:) = 0._CUSTOM_REAL
+
+ call read_parameters_noise(myrank,nrec,NSTEP,nmovie_points, &
+ islice_selected_rec,xi_receiver,eta_receiver,gamma_receiver,nu, &
+ noise_sourcearray,xigll,yigll,zigll,NSPEC2D_TOP(IREGION_CRUST_MANTLE), &
+ NIT, ibool_crust_mantle, ibelm_top_crust_mantle, &
+ xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+ irec_master_noise,normal_x_noise,normal_y_noise,normal_z_noise,mask_noise)
+
+ call check_parameters_noise(myrank,NOISE_TOMOGRAPHY,SIMULATION_TYPE,SAVE_FORWARD, &
+ NUMBER_OF_RUNS, NUMBER_OF_THIS_RUN,ROTATE_SEISMOGRAMS_RT, &
+ SAVE_ALL_SEISMOS_IN_ONE_FILE, USE_BINARY_FOR_LARGE_FILE, &
+ MOVIE_COARSE,LOCAL_PATH,NSPEC2D_TOP(IREGION_CRUST_MANTLE),NSTEP)
+ endif
+
+ end subroutine prepare_timerun_noise
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine prepare_timerun_GPU()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ use specfem_par_movie
+ implicit none
+ include 'mpif.h'
+
+ ! local parameters
+ integer :: ier
+ real :: free_mb,used_mb,total_mb
+ integer :: ncuda_devices,ncuda_devices_min,ncuda_devices_max
+ ! dummy custom_real variables to convert from double precision
+ real(kind=CUSTOM_REAL),dimension(:,:,:),allocatable:: cr_wgll_cube
+ real(kind=CUSTOM_REAL),dimension(:),allocatable:: &
+ cr_d_ln_density_dr_table,cr_minus_rho_g_over_kappa_fluid, &
+ cr_minus_gravity_table,cr_minus_deriv_gravity_table, &
+ cr_density_table
+
+ ! GPU_MODE now defined in Par_file
+ if(myrank == 0 ) then
+ write(IMAIN,*)
+ write(IMAIN,*) "GPU_MODE Active. Preparing Fields and Constants on Device."
+ write(IMAIN,*)
+ endif
+
+ ! initializes GPU and outputs info to files for all processes
+ call prepare_cuda_device(myrank,ncuda_devices)
+
+ ! collects min/max of local devices found for statistics
+ call MPI_REDUCE(ncuda_devices,ncuda_devices_min,1,MPI_INTEGER,MPI_MIN,0,MPI_COMM_WORLD,ier)
+ call MPI_REDUCE(ncuda_devices,ncuda_devices_max,1,MPI_INTEGER,MPI_MAX,0,MPI_COMM_WORLD,ier)
+
+ ! prepares general fields on GPU
+ call prepare_constants_device(Mesh_pointer,NGLLX, &
+ hprime_xx, hprime_yy, hprime_zz, &
+ hprimewgll_xx, hprimewgll_yy, hprimewgll_zz, &
+ wgllwgll_xy, wgllwgll_xz, wgllwgll_yz, &
+ NSOURCES, nsources_local, &
+ sourcearrays, islice_selected_source, ispec_selected_source, &
+ number_receiver_global, ispec_selected_rec, &
+ nrec, nrec_local, &
+ NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
+ NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
+ NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+ SIMULATION_TYPE, &
+ SAVE_FORWARD, &
+ ABSORBING_CONDITIONS, &
+ GRAVITY_VAL,ROTATION_VAL, &
+ ATTENUATION_VAL,USE_ATTENUATION_MIMIC, &
+ COMPUTE_AND_STORE_STRAIN, &
+ ANISOTROPIC_3D_MANTLE_VAL, &
+ ANISOTROPIC_INNER_CORE_VAL, &
+ SAVE_BOUNDARY_MESH, &
+ USE_MESH_COLORING_GPU)
+ call sync_all()
+
+ ! prepares rotation arrays
+ if( ROTATION_VAL ) then
+ if(myrank == 0 ) write(IMAIN,*) " loading rotation arrays"
+
+ call prepare_fields_rotation_device(Mesh_pointer, &
+ two_omega_earth,deltat, &
+ A_array_rotation,B_array_rotation, &
+ b_two_omega_earth,b_deltat, &
+ b_A_array_rotation,b_B_array_rotation, &
+ NSPEC_OUTER_CORE_ROTATION)
+ endif
+ call sync_all()
+
+ ! prepares arrays related to gravity
+ ! note: GPU will use only single-precision (or double precision) for all calculations
+ ! we convert to wgll_cube to custom real (by default single-precision),
+ ! using implicit conversion
+ if(myrank == 0 ) write(IMAIN,*) " loading non-gravity/gravity arrays"
+
+ allocate(cr_d_ln_density_dr_table(NRAD_GRAVITY), &
+ cr_minus_rho_g_over_kappa_fluid(NRAD_GRAVITY), &
+ cr_minus_gravity_table(NRAD_GRAVITY), &
+ cr_minus_deriv_gravity_table(NRAD_GRAVITY), &
+ cr_density_table(NRAD_GRAVITY), &
+ stat=ier)
+ if( ier /= 0 ) stop 'error allocating cr_minus_rho_g_over_kappa_fluid, etc...'
+ ! d_ln_density_dr_table needed for no gravity case
+ cr_d_ln_density_dr_table(:) = d_ln_density_dr_table(:)
+ ! these are needed for gravity cases only
+ cr_minus_rho_g_over_kappa_fluid(:) = minus_rho_g_over_kappa_fluid(:)
+ cr_minus_gravity_table(:) = minus_gravity_table(:)
+ cr_minus_deriv_gravity_table(:) = minus_deriv_gravity_table(:)
+ cr_density_table(:) = density_table(:)
+
+ allocate(cr_wgll_cube(NGLLX,NGLLY,NGLLZ),stat=ier)
+ if( ier /= 0 ) stop 'error allocating cr_wgll_cube'
+ cr_wgll_cube(:,:,:) = wgll_cube(:,:,:)
+
+ ! prepares on GPU
+ call prepare_fields_gravity_device(Mesh_pointer, &
+ cr_d_ln_density_dr_table, &
+ cr_minus_rho_g_over_kappa_fluid, &
+ cr_minus_gravity_table, &
+ cr_minus_deriv_gravity_table, &
+ cr_density_table, &
+ cr_wgll_cube, &
+ NRAD_GRAVITY)
+ deallocate(cr_d_ln_density_dr_table,cr_minus_rho_g_over_kappa_fluid, &
+ cr_minus_gravity_table,cr_minus_deriv_gravity_table, &
+ cr_density_table)
+ deallocate(cr_wgll_cube)
+ call sync_all()
+
+ ! prepares attenuation arrays
+ if( ATTENUATION_VAL ) then
+ if(myrank == 0 ) write(IMAIN,*) " loading attenuation"
+
+ call prepare_fields_attenuat_device(Mesh_pointer, &
+ R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle, &
+ R_xz_crust_mantle,R_yz_crust_mantle, &
+ factor_common_crust_mantle, &
+ one_minus_sum_beta_crust_mantle, &
+ R_xx_inner_core,R_yy_inner_core,R_xy_inner_core, &
+ R_xz_inner_core,R_yz_inner_core, &
+ factor_common_inner_core, &
+ one_minus_sum_beta_inner_core, &
+ alphaval,betaval,gammaval, &
+ b_alphaval,b_betaval,b_gammaval)
+ endif
+ call sync_all()
+
+
+ ! prepares attenuation arrays
+ if( COMPUTE_AND_STORE_STRAIN ) then
+ if(myrank == 0 ) write(IMAIN,*) " loading strain"
+
+ call prepare_fields_strain_device(Mesh_pointer, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,b_epsilondev_xy_crust_mantle, &
+ b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
+ eps_trace_over_3_crust_mantle, &
+ b_eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+ epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
+ b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+ b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
+ eps_trace_over_3_inner_core, &
+ b_eps_trace_over_3_inner_core)
+ endif
+ call sync_all()
+
+
+ ! prepares MPI interfaces
+ if(myrank == 0 ) write(IMAIN,*) " loading mpi interfaces"
+
+ call prepare_mpi_buffers_device(Mesh_pointer, &
+ num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+ nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
+ num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+ nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
+ num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+ nibool_interfaces_outer_core,ibool_interfaces_outer_core)
+
+ ! crust/mantle region
+ if(myrank == 0 ) write(IMAIN,*) " loading crust/mantle region"
+ call prepare_crust_mantle_device(Mesh_pointer, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ rhostore_crust_mantle, &
+ kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle, &
+ eta_anisostore_crust_mantle, &
+ rmass_crust_mantle, &
+ ibool_crust_mantle, &
+ xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+ ispec_is_tiso_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
+ c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
+ c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
+ c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
+ c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
+ c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ num_phase_ispec_crust_mantle,phase_ispec_inner_crust_mantle, &
+ nspec_outer_crust_mantle,nspec_inner_crust_mantle)
+ call sync_all()
+
+
+ ! outer core region
+ if(myrank == 0 ) write(IMAIN,*) " loading outer core region"
+ call prepare_outer_core_device(Mesh_pointer, &
+ xix_outer_core,xiy_outer_core,xiz_outer_core, &
+ etax_outer_core,etay_outer_core,etaz_outer_core, &
+ gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
+ rhostore_outer_core,kappavstore_outer_core, &
+ rmass_outer_core, &
+ ibool_outer_core, &
+ xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+ num_phase_ispec_outer_core,phase_ispec_inner_outer_core, &
+ nspec_outer_outer_core,nspec_inner_outer_core)
+ call sync_all()
+
+
+ ! inner core region
+ if(myrank == 0 ) write(IMAIN,*) " loading inner core region"
+ call prepare_inner_core_device(Mesh_pointer, &
+ xix_inner_core,xiy_inner_core,xiz_inner_core, &
+ etax_inner_core,etay_inner_core,etaz_inner_core, &
+ gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
+ rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
+ rmass_inner_core, &
+ ibool_inner_core, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ c11store_inner_core,c12store_inner_core,c13store_inner_core, &
+ c33store_inner_core,c44store_inner_core, &
+ idoubling_inner_core, &
+ num_phase_ispec_inner_core,phase_ispec_inner_inner_core, &
+ nspec_outer_inner_core,nspec_inner_inner_core)
+ call sync_all()
+
+
+ ! transfer forward and backward fields to device with initial values
+ if(myrank == 0 ) write(IMAIN,*) " transfering initial wavefield"
+ call transfer_fields_cm_to_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
+ Mesh_pointer)
+
+ call transfer_fields_ic_to_device(NDIM*NGLOB_INNER_CORE,displ_inner_core,veloc_inner_core,accel_inner_core, &
+ Mesh_pointer)
+
+ call transfer_fields_oc_to_device(NGLOB_OUTER_CORE,displ_outer_core,veloc_outer_core,accel_outer_core, &
+ Mesh_pointer)
+
+ if(SIMULATION_TYPE == 3) then
+ call transfer_b_fields_cm_to_device(NDIM*NGLOB_CRUST_MANTLE, &
+ b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
+ Mesh_pointer)
+
+ call transfer_b_fields_ic_to_device(NDIM*NGLOB_INNER_CORE, &
+ b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
+ Mesh_pointer)
+
+ call transfer_b_fields_oc_to_device(NGLOB_OUTER_CORE, &
+ b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core, &
+ Mesh_pointer)
+ endif
+
+
+ ! outputs GPU usage to files for all processes
+ call output_free_device_memory(myrank)
+
+ ! outputs usage for main process
+ if( myrank == 0 ) then
+ write(IMAIN,*)" GPU number of devices per node: min =",ncuda_devices_min
+ write(IMAIN,*)" max =",ncuda_devices_max
+ write(IMAIN,*)
+
+ call get_free_device_memory(free_mb,used_mb,total_mb)
+ write(IMAIN,*)" GPU usage: free =",free_mb," MB",nint(free_mb/total_mb*100.0),"%"
+ write(IMAIN,*)" used =",used_mb," MB",nint(used_mb/total_mb*100.0),"%"
+ write(IMAIN,*)" total =",total_mb," MB",nint(total_mb/total_mb*100.0),"%"
+ write(IMAIN,*)
+ endif
+
+ end subroutine prepare_timerun_GPU
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -26,13 +26,13 @@
!=====================================================================
subroutine read_arrays_buffers_solver(iregion_code,myrank, &
- iboolleft_xi,iboolright_xi,iboolleft_eta,iboolright_eta, &
- npoin2D_xi,npoin2D_eta, &
- iprocfrom_faces,iprocto_faces,imsg_type, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- iboolfaces,npoin2D_faces,iboolcorner, &
- NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,NGLOB1D_RADIAL, &
- NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT,NPROC_XI,NPROC_ETA,LOCAL_PATH,NCHUNKS)
+ iboolleft_xi,iboolright_xi,iboolleft_eta,iboolright_eta, &
+ npoin2D_xi,npoin2D_eta, &
+ iprocfrom_faces,iprocto_faces,imsg_type, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ iboolfaces,npoin2D_faces,iboolcorner, &
+ NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,NGLOB1D_RADIAL, &
+ NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT,NPROC_XI,NPROC_ETA,LOCAL_PATH,NCHUNKS)
implicit none
@@ -117,10 +117,9 @@
close(IIN)
if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) '# max of points in MPI buffers along xi npoin2D_xi = ', &
+ write(IMAIN,*) ' #max of points in MPI buffers along xi npoin2D_xi = ', &
maxval(npoin2D_xi(:))
- write(IMAIN,*) '# max of array elements transferred npoin2D_xi*NDIM = ', &
+ write(IMAIN,*) ' #max of array elements transferred npoin2D_xi*NDIM = ', &
maxval(npoin2D_xi(:))*NDIM
write(IMAIN,*)
endif
@@ -164,10 +163,9 @@
close(IIN)
if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) '#max of points in MPI buffers along eta npoin2D_eta = ', &
+ write(IMAIN,*) ' #max of points in MPI buffers along eta npoin2D_eta = ', &
maxval(npoin2D_eta(:))
- write(IMAIN,*) '#max of array elements transferred npoin2D_eta*NDIM = ', &
+ write(IMAIN,*) ' #max of array elements transferred npoin2D_eta*NDIM = ', &
maxval(npoin2D_eta(:))*NDIM
write(IMAIN,*)
endif
@@ -182,34 +180,34 @@
if(myrank == 0) then
-! file with the list of processors for each message for faces
- open(unit=IIN,file=trim(OUTPUT_FILES)//'/list_messages_faces.txt',status='old',action='read')
- do imsg = 1,NUMMSGS_FACES
- read(IIN,*) imsg_type(imsg),iprocfrom_faces(imsg),iprocto_faces(imsg)
- if (iprocfrom_faces(imsg) < 0 &
- .or. iprocto_faces(imsg) < 0 &
- .or. iprocfrom_faces(imsg) > NPROCTOT-1 &
- .or. iprocto_faces(imsg) > NPROCTOT-1) &
- call exit_MPI(myrank,'incorrect chunk faces numbering')
- if (imsg_type(imsg) < 1 .or. imsg_type(imsg) > 3) &
- call exit_MPI(myrank,'incorrect message type labeling')
- enddo
- close(IIN)
+ ! file with the list of processors for each message for faces
+ open(unit=IIN,file=trim(OUTPUT_FILES)//'/list_messages_faces.txt',status='old',action='read')
+ do imsg = 1,NUMMSGS_FACES
+ read(IIN,*) imsg_type(imsg),iprocfrom_faces(imsg),iprocto_faces(imsg)
+ if (iprocfrom_faces(imsg) < 0 &
+ .or. iprocto_faces(imsg) < 0 &
+ .or. iprocfrom_faces(imsg) > NPROCTOT-1 &
+ .or. iprocto_faces(imsg) > NPROCTOT-1) &
+ call exit_MPI(myrank,'incorrect chunk faces numbering')
+ if (imsg_type(imsg) < 1 .or. imsg_type(imsg) > 3) &
+ call exit_MPI(myrank,'incorrect message type labeling')
+ enddo
+ close(IIN)
-! file with the list of processors for each message for corners
- open(unit=IIN,file=trim(OUTPUT_FILES)//'/list_messages_corners.txt',status='old',action='read')
- do imsg = 1,NCORNERSCHUNKS
- read(IIN,*) iproc_master_corners(imsg),iproc_worker1_corners(imsg), &
- iproc_worker2_corners(imsg)
- if (iproc_master_corners(imsg) < 0 &
- .or. iproc_worker1_corners(imsg) < 0 &
- .or. iproc_worker2_corners(imsg) < 0 &
- .or. iproc_master_corners(imsg) > NPROCTOT-1 &
- .or. iproc_worker1_corners(imsg) > NPROCTOT-1 &
- .or. iproc_worker2_corners(imsg) > NPROCTOT-1) &
- call exit_MPI(myrank,'incorrect chunk corner numbering')
- enddo
- close(IIN)
+ ! file with the list of processors for each message for corners
+ open(unit=IIN,file=trim(OUTPUT_FILES)//'/list_messages_corners.txt',status='old',action='read')
+ do imsg = 1,NCORNERSCHUNKS
+ read(IIN,*) iproc_master_corners(imsg),iproc_worker1_corners(imsg), &
+ iproc_worker2_corners(imsg)
+ if (iproc_master_corners(imsg) < 0 &
+ .or. iproc_worker1_corners(imsg) < 0 &
+ .or. iproc_worker2_corners(imsg) < 0 &
+ .or. iproc_master_corners(imsg) > NPROCTOT-1 &
+ .or. iproc_worker1_corners(imsg) > NPROCTOT-1 &
+ .or. iproc_worker2_corners(imsg) > NPROCTOT-1) &
+ call exit_MPI(myrank,'incorrect chunk corner numbering')
+ enddo
+ close(IIN)
endif
@@ -254,33 +252,33 @@
!---- a given slice can belong to at most one corner
icount_corners = 0
do imsg = 1,NCORNERSCHUNKS
- if(myrank == iproc_master_corners(imsg) .or. &
- myrank == iproc_worker1_corners(imsg) .or. &
- myrank == iproc_worker2_corners(imsg)) then
- icount_corners = icount_corners + 1
- if(icount_corners>1 .and. (NPROC_XI > 1 .or. NPROC_ETA > 1)) &
- call exit_MPI(myrank,'more than one corner for this slice')
- if(icount_corners>4) call exit_MPI(myrank,'more than four corners for this slice')
+ if(myrank == iproc_master_corners(imsg) .or. &
+ myrank == iproc_worker1_corners(imsg) .or. &
+ myrank == iproc_worker2_corners(imsg)) then
+ icount_corners = icount_corners + 1
+ if(icount_corners>1 .and. (NPROC_XI > 1 .or. NPROC_ETA > 1)) &
+ call exit_MPI(myrank,'more than one corner for this slice')
+ if(icount_corners>4) call exit_MPI(myrank,'more than four corners for this slice')
-! read file with 1D buffer for corner
- if(myrank == iproc_master_corners(imsg)) then
- write(filename,"('buffer_corners_chunks_master_msg',i6.6,'.txt')") imsg
- else if(myrank == iproc_worker1_corners(imsg)) then
- write(filename,"('buffer_corners_chunks_worker1_msg',i6.6,'.txt')") imsg
- else if(myrank == iproc_worker2_corners(imsg)) then
- write(filename,"('buffer_corners_chunks_worker2_msg',i6.6,'.txt')") imsg
- endif
+ ! read file with 1D buffer for corner
+ if(myrank == iproc_master_corners(imsg)) then
+ write(filename,"('buffer_corners_chunks_master_msg',i6.6,'.txt')") imsg
+ else if(myrank == iproc_worker1_corners(imsg)) then
+ write(filename,"('buffer_corners_chunks_worker1_msg',i6.6,'.txt')") imsg
+ else if(myrank == iproc_worker2_corners(imsg)) then
+ write(filename,"('buffer_corners_chunks_worker2_msg',i6.6,'.txt')") imsg
+ endif
-! matching codes
- open(unit=IIN,file=prname(1:len_trim(prname))//filename,status='old',action='read')
- read(IIN,*) npoin1D_corner
- if(npoin1D_corner /= NGLOB1D_RADIAL) &
- call exit_MPI(myrank,'incorrect nb of points in corner buffer')
- do ipoin1D = 1,npoin1D_corner
- read(IIN,*) iboolcorner(ipoin1D,icount_corners),xdummy,ydummy,zdummy
- enddo
- close(IIN)
- endif
+ ! matching codes
+ open(unit=IIN,file=prname(1:len_trim(prname))//filename,status='old',action='read')
+ read(IIN,*) npoin1D_corner
+ if(npoin1D_corner /= NGLOB1D_RADIAL) &
+ call exit_MPI(myrank,'incorrect nb of points in corner buffer')
+ do ipoin1D = 1,npoin1D_corner
+ read(IIN,*) iboolcorner(ipoin1D,icount_corners),xdummy,ydummy,zdummy
+ enddo
+ close(IIN)
+ endif
enddo
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_forward_arrays.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_forward_arrays.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_forward_arrays.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -25,77 +25,17 @@
!
!=====================================================================
- subroutine read_forward_arrays_startrun(myrank,NSTEP, &
- SIMULATION_TYPE,NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN, &
- it_begin,it_end, &
- displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
- displ_inner_core,veloc_inner_core,accel_inner_core, &
- displ_outer_core,veloc_outer_core,accel_outer_core, &
- R_memory_crust_mantle,R_memory_inner_core, &
- epsilondev_crust_mantle,epsilondev_inner_core, &
- A_array_rotation,B_array_rotation, &
- b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
- b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
- b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core, &
- b_R_memory_crust_mantle,b_R_memory_inner_core, &
- b_epsilondev_crust_mantle,b_epsilondev_inner_core, &
- b_A_array_rotation,b_B_array_rotation,LOCAL_PATH)
+ subroutine read_forward_arrays_startrun()
! reads in saved wavefields
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
-
- integer myrank,NSTEP
-
- integer SIMULATION_TYPE
-
- integer NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN,it_begin,it_end
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
- displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: &
- displ_inner_core,veloc_inner_core,accel_inner_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: &
- displ_outer_core,veloc_outer_core,accel_outer_core
-
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
- R_memory_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
- epsilondev_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
- R_memory_inner_core
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
- epsilondev_inner_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
- A_array_rotation,B_array_rotation
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
- b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE_ADJOINT) :: &
- b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE_ADJOINT) :: &
- b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core
-
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT) :: &
- b_R_memory_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
- b_epsilondev_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT) :: &
- b_R_memory_inner_core
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
- b_epsilondev_inner_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROT_ADJOINT) :: &
- b_A_array_rotation,b_B_array_rotation
-
- character(len=150) LOCAL_PATH
-
- !local parameters
+ ! local parameters
character(len=150) outputname
! define correct time steps if restart files
@@ -133,6 +73,7 @@
if(NUMBER_OF_THIS_RUN > 1) then
write(outputname,"('dump_all_arrays',i6.6)") myrank
open(unit=55,file=trim(LOCAL_PATH)//'/'//outputname,status='old',action='read',form='unformatted')
+
read(55) displ_crust_mantle
read(55) veloc_crust_mantle
read(55) accel_crust_mantle
@@ -142,12 +83,34 @@
read(55) displ_outer_core
read(55) veloc_outer_core
read(55) accel_outer_core
- read(55) epsilondev_crust_mantle
- read(55) epsilondev_inner_core
+
+ read(55) epsilondev_xx_crust_mantle
+ read(55) epsilondev_yy_crust_mantle
+ read(55) epsilondev_xy_crust_mantle
+ read(55) epsilondev_xz_crust_mantle
+ read(55) epsilondev_yz_crust_mantle
+
+ read(55) epsilondev_xx_inner_core
+ read(55) epsilondev_yy_inner_core
+ read(55) epsilondev_xy_inner_core
+ read(55) epsilondev_xz_inner_core
+ read(55) epsilondev_yz_inner_core
+
read(55) A_array_rotation
read(55) B_array_rotation
- read(55) R_memory_crust_mantle
- read(55) R_memory_inner_core
+
+ read(55) R_xx_crust_mantle
+ read(55) R_yy_crust_mantle
+ read(55) R_xy_crust_mantle
+ read(55) R_xz_crust_mantle
+ read(55) R_yz_crust_mantle
+
+ read(55) R_xx_inner_core
+ read(55) R_yy_inner_core
+ read(55) R_xy_inner_core
+ read(55) R_xz_inner_core
+ read(55) R_yz_inner_core
+
close(55)
endif
@@ -162,15 +125,36 @@
b_displ_outer_core = 0._CUSTOM_REAL
b_veloc_outer_core = 0._CUSTOM_REAL
b_accel_outer_core = 0._CUSTOM_REAL
- b_epsilondev_crust_mantle = 0._CUSTOM_REAL
- b_epsilondev_inner_core = 0._CUSTOM_REAL
+
+ b_epsilondev_xx_crust_mantle = 0._CUSTOM_REAL
+ b_epsilondev_yy_crust_mantle = 0._CUSTOM_REAL
+ b_epsilondev_xy_crust_mantle = 0._CUSTOM_REAL
+ b_epsilondev_xz_crust_mantle = 0._CUSTOM_REAL
+ b_epsilondev_yz_crust_mantle = 0._CUSTOM_REAL
+
+ b_epsilondev_xx_inner_core = 0._CUSTOM_REAL
+ b_epsilondev_yy_inner_core = 0._CUSTOM_REAL
+ b_epsilondev_xy_inner_core = 0._CUSTOM_REAL
+ b_epsilondev_xz_inner_core = 0._CUSTOM_REAL
+ b_epsilondev_yz_inner_core = 0._CUSTOM_REAL
+
if (ROTATION_VAL) then
b_A_array_rotation = 0._CUSTOM_REAL
b_B_array_rotation = 0._CUSTOM_REAL
endif
if (ATTENUATION_VAL) then
- b_R_memory_crust_mantle = 0._CUSTOM_REAL
- b_R_memory_inner_core = 0._CUSTOM_REAL
+ b_R_xx_crust_mantle = 0._CUSTOM_REAL
+ b_R_yy_crust_mantle = 0._CUSTOM_REAL
+ b_R_xy_crust_mantle = 0._CUSTOM_REAL
+ b_R_xz_crust_mantle = 0._CUSTOM_REAL
+ b_R_yz_crust_mantle = 0._CUSTOM_REAL
+
+ b_R_xx_inner_core = 0._CUSTOM_REAL
+ b_R_yy_inner_core = 0._CUSTOM_REAL
+ b_R_xy_inner_core = 0._CUSTOM_REAL
+ b_R_xz_inner_core = 0._CUSTOM_REAL
+ b_R_yz_inner_core = 0._CUSTOM_REAL
+
endif
endif
@@ -180,51 +164,22 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine read_forward_arrays(myrank, &
- b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
- b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
- b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core, &
- b_R_memory_crust_mantle,b_R_memory_inner_core, &
- b_epsilondev_crust_mantle,b_epsilondev_inner_core, &
- b_A_array_rotation,b_B_array_rotation,LOCAL_PATH)
+ subroutine read_forward_arrays()
-! reads in saved wavefields
+! reads in saved wavefields to reconstruct/backward wavefield
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
-
- integer myrank
-
- ! backward/reconstructed wavefields
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
- b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE_ADJOINT) :: &
- b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE_ADJOINT) :: &
- b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core
-
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT) :: &
- b_R_memory_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
- b_epsilondev_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT) :: &
- b_R_memory_inner_core
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
- b_epsilondev_inner_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROT_ADJOINT) :: &
- b_A_array_rotation,b_B_array_rotation
-
- character(len=150) LOCAL_PATH
-
!local parameters
character(len=150) outputname
write(outputname,'(a,i6.6,a)') 'proc',myrank,'_save_forward_arrays.bin'
open(unit=55,file=trim(LOCAL_PATH)//'/'//outputname,status='old',action='read',form='unformatted')
+
read(55) b_displ_crust_mantle
read(55) b_veloc_crust_mantle
read(55) b_accel_crust_mantle
@@ -234,16 +189,38 @@
read(55) b_displ_outer_core
read(55) b_veloc_outer_core
read(55) b_accel_outer_core
- read(55) b_epsilondev_crust_mantle
- read(55) b_epsilondev_inner_core
+
+ read(55) b_epsilondev_xx_crust_mantle
+ read(55) b_epsilondev_yy_crust_mantle
+ read(55) b_epsilondev_xy_crust_mantle
+ read(55) b_epsilondev_xz_crust_mantle
+ read(55) b_epsilondev_yz_crust_mantle
+
+ read(55) b_epsilondev_xx_inner_core
+ read(55) b_epsilondev_yy_inner_core
+ read(55) b_epsilondev_xy_inner_core
+ read(55) b_epsilondev_xz_inner_core
+ read(55) b_epsilondev_yz_inner_core
+
if (ROTATION_VAL) then
read(55) b_A_array_rotation
read(55) b_B_array_rotation
endif
if (ATTENUATION_VAL) then
- read(55) b_R_memory_crust_mantle
- read(55) b_R_memory_inner_core
+ read(55) b_R_xx_crust_mantle
+ read(55) b_R_yy_crust_mantle
+ read(55) b_R_xy_crust_mantle
+ read(55) b_R_xz_crust_mantle
+ read(55) b_R_yz_crust_mantle
+
+ read(55) b_R_xx_inner_core
+ read(55) b_R_yy_inner_core
+ read(55) b_R_xy_inner_core
+ read(55) b_R_xz_inner_core
+ read(55) b_R_yz_inner_core
+
endif
+
close(55)
end subroutine read_forward_arrays
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -25,136 +25,97 @@
!
!=====================================================================
- subroutine read_mesh_databases(myrank,rho_vp_crust_mantle,rho_vs_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- nspec_iso,nspec_tiso,nspec_ani, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! -- idoubling_crust_mantle
- is_on_a_slice_edge_crust_mantle,rmass_crust_mantle,rmass_ocean_load, &
- vp_outer_core,xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- rhostore_outer_core,kappavstore_outer_core, &
- ibool_outer_core,idoubling_outer_core,ispec_is_tiso_outer_core, &
- is_on_a_slice_edge_outer_core,rmass_outer_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
- rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
- c11store_inner_core,c12store_inner_core,c13store_inner_core, &
- c33store_inner_core,c44store_inner_core, &
- ibool_inner_core,idoubling_inner_core,ispec_is_tiso_inner_core, &
- is_on_a_slice_edge_inner_core,rmass_inner_core, &
- ABSORBING_CONDITIONS,LOCAL_PATH)
+ subroutine read_mesh_databases()
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+ include 'mpif.h'
+
+ ! local parameters
+ integer :: ier
- integer myrank
+ ! get MPI starting time
+ time_start = MPI_WTIME()
- ! Stacey
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STACEY) :: &
- rho_vp_crust_mantle,rho_vs_crust_mantle
+ ! allocates temporary arrays
+ allocate( is_on_a_slice_edge_crust_mantle(NSPEC_CRUST_MANTLE), &
+ is_on_a_slice_edge_inner_core(NSPEC_INNER_CORE), &
+ is_on_a_slice_edge_outer_core(NSPEC_OUTER_CORE), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating temporary is_on_a_slice_edge arrays')
- ! mesh parameters
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle,&
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle
- ! arrays for anisotropic elements stored only where needed to save space
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle
+ ! start reading the databases
+ ! read arrays created by the mesher
- ! arrays for full anisotropy only when needed
- integer nspec_iso,nspec_tiso,nspec_ani
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle
+ ! reads "solver_data_1.bin" & "solver_data_2.bin" files for crust and mantle
+ call read_mesh_databases_CM()
+
+ ! reads "solver_data_1.bin" & "solver_data_2.bin" files for outer core
+ call read_mesh_databases_OC()
+
+ ! reads "solver_data_1.bin" & "solver_data_2.bin" files for inner core
+ call read_mesh_databases_IC()
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
+ ! reads "boundary.bin" files to couple mantle with outer core and inner core boundaries
+ call read_mesh_databases_coupling()
+
+ ! reads "addressing.txt" 2-D addressing for summation between slices with MPI
+ call read_mesh_databases_addressing()
-! integer, dimension(NSPEC_CRUST_MANTLE) :: idoubling_crust_mantle
- logical, dimension(NSPEC_CRUST_MANTLE) :: ispec_is_tiso_crust_mantle
+ ! reads "iboolleft_..txt", "iboolright_..txt" (and "list_messages_..txt", "buffer_...txt") files and sets up MPI buffers
+ call read_mesh_databases_MPIbuffers()
- ! mass matrix
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: rmass_crust_mantle
- ! additional mass matrix for ocean load
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE_OCEANS) :: rmass_ocean_load
+ ! sets up MPI interfaces
+ call read_mesh_databases_MPIinter()
- ! stacy outer core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_STACEY) :: vp_outer_core
- ! mesh parameters
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: &
- xstore_outer_core,ystore_outer_core,zstore_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: &
- xix_outer_core,xiy_outer_core,xiz_outer_core,&
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: &
- rhostore_outer_core,kappavstore_outer_core
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
- integer, dimension(NSPEC_OUTER_CORE) :: idoubling_outer_core
- logical, dimension(NSPEC_OUTER_CORE) :: ispec_is_tiso_outer_core
+ ! sets up inner/outer element arrays
+ call read_mesh_databases_InnerOuter()
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: rmass_outer_core
+ ! absorbing boundaries
+ if(ABSORBING_CONDITIONS) then
+ ! reads "stacey.bin" files and sets up arrays for Stacey conditions
+ call read_mesh_databases_stacey()
+ endif
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
- xix_inner_core,xiy_inner_core,xiz_inner_core,&
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
- rhostore_inner_core, kappavstore_inner_core,muvstore_inner_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: &
- xstore_inner_core,ystore_inner_core,zstore_inner_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_IC) :: &
- c11store_inner_core,c33store_inner_core,c12store_inner_core, &
- c13store_inner_core,c44store_inner_core
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
- integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
- logical, dimension(NSPEC_INNER_CORE) :: ispec_is_tiso_inner_core
+ ! user output
+ if( myrank == 0 ) then
+ ! elapsed time since beginning of mesh generation
+ tCPU = MPI_WTIME() - time_start
+ write(IMAIN,*)
+ write(IMAIN,*) 'Elapsed time for reading mesh in seconds = ',sngl(tCPU)
+ write(IMAIN,*)
+ endif
+
+ ! frees temporary allocated arrays
+ deallocate(is_on_a_slice_edge_crust_mantle, &
+ is_on_a_slice_edge_outer_core, &
+ is_on_a_slice_edge_inner_core)
- real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: rmass_inner_core
+ end subroutine read_mesh_databases
- logical ABSORBING_CONDITIONS
- character(len=150) LOCAL_PATH
+!
+!-------------------------------------------------------------------------------------------------
+!
- !local parameters
- logical READ_KAPPA_MU,READ_TISO
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
- integer, dimension(NSPEC_CRUST_MANTLE) :: dummy_i
+ subroutine read_mesh_databases_CM()
-! this for non blocking MPI
- logical, dimension(NSPEC_CRUST_MANTLE) :: is_on_a_slice_edge_crust_mantle
- logical, dimension(NSPEC_OUTER_CORE) :: is_on_a_slice_edge_outer_core
- logical, dimension(NSPEC_INNER_CORE) :: is_on_a_slice_edge_inner_core
+! mesh for CRUST MANTLE region
- ! start reading the databases
- ! read arrays created by the mesher
+ use specfem_par
+ use specfem_par_crustmantle
+ implicit none
+ ! local parameters
+ integer :: nspec_iso,nspec_tiso,nspec_ani
+ logical :: READ_KAPPA_MU,READ_TISO
+
+ ! dummy array that does not need to be actually read
+ integer, dimension(NSPEC_CRUST_MANTLE) :: dummy_i
+
! crust and mantle
if(ANISOTROPIC_3D_MANTLE_VAL) then
READ_KAPPA_MU = .false.
@@ -173,6 +134,7 @@
READ_KAPPA_MU = .true.
READ_TISO = .true.
endif
+
call read_arrays_solver(IREGION_CRUST_MANTLE,myrank, &
rho_vp_crust_mantle,rho_vs_crust_mantle, &
xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
@@ -190,13 +152,42 @@
c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
ibool_crust_mantle,dummy_i, &
- ! --idoubling_crust_mantle,
ispec_is_tiso_crust_mantle, &
is_on_a_slice_edge_crust_mantle,rmass_crust_mantle,rmass_ocean_load, &
NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
READ_KAPPA_MU,READ_TISO,TRANSVERSE_ISOTROPY_VAL,ANISOTROPIC_3D_MANTLE_VAL, &
ANISOTROPIC_INNER_CORE_VAL,OCEANS_VAL,LOCAL_PATH,ABSORBING_CONDITIONS)
+ ! check that the number of points in this slice is correct
+ if(minval(ibool_crust_mantle(:,:,:,:)) /= 1 .or. &
+ maxval(ibool_crust_mantle(:,:,:,:)) /= NGLOB_CRUST_MANTLE) &
+ call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in crust and mantle')
+
+
+ end subroutine read_mesh_databases_CM
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_OC()
+
+! mesh for OUTER CORE region
+
+ use specfem_par
+ use specfem_par_outercore
+ implicit none
+
+ ! local parameters
+ integer :: nspec_iso,nspec_tiso,nspec_ani
+ logical :: READ_KAPPA_MU,READ_TISO
+
+ ! dummy array that does not need to be actually read
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
+
+ logical, dimension(:),allocatable:: dummy_ispec_is_tiso
+ integer, dimension(:),allocatable :: dummy_idoubling_outer_core
+
! outer core (no anisotropy nor S velocity)
! rmass_ocean_load is not used in this routine because it is meaningless in the outer core
READ_KAPPA_MU = .false.
@@ -204,6 +195,10 @@
nspec_iso = NSPEC_OUTER_CORE
nspec_tiso = 1
nspec_ani = 1
+
+ ! dummy allocation
+ allocate(dummy_ispec_is_tiso(NSPEC_OUTER_CORE))
+ allocate(dummy_idoubling_outer_core(NSPEC_OUTER_CORE))
call read_arrays_solver(IREGION_OUTER_CORE,myrank, &
vp_outer_core,dummy_array, &
@@ -221,12 +216,42 @@
dummy_array,dummy_array,dummy_array, &
dummy_array,dummy_array,dummy_array, &
dummy_array,dummy_array,dummy_array, &
- ibool_outer_core,idoubling_outer_core,ispec_is_tiso_outer_core, &
+ ibool_outer_core,dummy_idoubling_outer_core,dummy_ispec_is_tiso, &
is_on_a_slice_edge_outer_core,rmass_outer_core,rmass_ocean_load, &
NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
READ_KAPPA_MU,READ_TISO,TRANSVERSE_ISOTROPY_VAL,ANISOTROPIC_3D_MANTLE_VAL, &
ANISOTROPIC_INNER_CORE_VAL,OCEANS_VAL,LOCAL_PATH,ABSORBING_CONDITIONS)
+ deallocate(dummy_idoubling_outer_core)
+ deallocate(dummy_ispec_is_tiso)
+
+ ! check that the number of points in this slice is correct
+ if(minval(ibool_outer_core(:,:,:,:)) /= 1 .or. &
+ maxval(ibool_outer_core(:,:,:,:)) /= NGLOB_OUTER_CORE) &
+ call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in outer core')
+
+ end subroutine read_mesh_databases_OC
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_IC()
+
+! mesh for INNER CORE region
+
+ use specfem_par
+ use specfem_par_innercore
+ implicit none
+
+ ! local parameters
+ integer :: nspec_iso,nspec_tiso,nspec_ani
+ logical :: READ_KAPPA_MU,READ_TISO
+
+ ! dummy array that does not need to be actually read
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
+ logical, dimension(:),allocatable:: dummy_ispec_is_tiso
+
! inner core (no anisotropy)
! rmass_ocean_load is not used in this routine because it is meaningless in the inner core
READ_KAPPA_MU = .true.
@@ -238,7 +263,10 @@
else
nspec_ani = 1
endif
-
+
+ ! dummy allocation
+ allocate(dummy_ispec_is_tiso(NSPEC_INNER_CORE))
+
call read_arrays_solver(IREGION_INNER_CORE,myrank, &
dummy_array,dummy_array, &
xstore_inner_core,ystore_inner_core,zstore_inner_core, &
@@ -255,104 +283,193 @@
dummy_array,dummy_array,dummy_array, &
c44store_inner_core,dummy_array,dummy_array, &
dummy_array,dummy_array,dummy_array, &
- ibool_inner_core,idoubling_inner_core,ispec_is_tiso_inner_core, &
+ ibool_inner_core,idoubling_inner_core,dummy_ispec_is_tiso, &
is_on_a_slice_edge_inner_core,rmass_inner_core,rmass_ocean_load, &
NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
READ_KAPPA_MU,READ_TISO,TRANSVERSE_ISOTROPY_VAL,ANISOTROPIC_3D_MANTLE_VAL, &
ANISOTROPIC_INNER_CORE_VAL,OCEANS_VAL,LOCAL_PATH,ABSORBING_CONDITIONS)
+ deallocate(dummy_ispec_is_tiso)
+
! check that the number of points in this slice is correct
- if(minval(ibool_crust_mantle(:,:,:,:)) /= 1 .or. &
- maxval(ibool_crust_mantle(:,:,:,:)) /= NGLOB_CRUST_MANTLE) &
- call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in crust and mantle')
-
- if(minval(ibool_outer_core(:,:,:,:)) /= 1 .or. &
- maxval(ibool_outer_core(:,:,:,:)) /= NGLOB_OUTER_CORE) &
- call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in outer core')
-
if(minval(ibool_inner_core(:,:,:,:)) /= 1 .or. maxval(ibool_inner_core(:,:,:,:)) /= NGLOB_INNER_CORE) &
call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in inner core')
- end subroutine read_mesh_databases
+ end subroutine read_mesh_databases_IC
!
!-------------------------------------------------------------------------------------------------
!
+ subroutine read_mesh_databases_coupling()
- subroutine read_mesh_databases_addressing(myrank, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle, &
- iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,npoin2D_faces_crust_mantle, &
- iboolcorner_crust_mantle, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core, &
- iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_xi_outer_core,npoin2D_eta_outer_core,&
- iboolfaces_outer_core,npoin2D_faces_outer_core, &
- iboolcorner_outer_core, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core, &
- iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,npoin2D_faces_inner_core, &
- iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces,imsg_type, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- LOCAL_PATH,OUTPUT_FILES, &
- NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB1D_RADIAL, &
- NGLOB2DMAX_XY,NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
- addressing,ichunk_slice,iproc_xi_slice,iproc_eta_slice, &
- ichunk,iproc_xi,iproc_eta)
+! to couple mantle with outer core
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+
implicit none
include 'mpif.h'
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+
+ ! local parameters
+ integer njunk1,njunk2,njunk3
- integer myrank
+ ! crust and mantle
+ ! create name of database
+ call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
- ! 2-D addressing and buffers for summation between slices
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_OC) :: iboolleft_xi_outer_core,iboolright_xi_outer_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_OC) :: iboolleft_eta_outer_core,iboolright_eta_outer_core
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
+ ! Stacey put back
+ open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
+ status='old',form='unformatted',action='read')
+ read(27) nspec2D_xmin_crust_mantle
+ read(27) nspec2D_xmax_crust_mantle
+ read(27) nspec2D_ymin_crust_mantle
+ read(27) nspec2D_ymax_crust_mantle
+ read(27) njunk1
+ read(27) njunk2
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_outer_core,npoin2D_eta_outer_core
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_inner_core,npoin2D_eta_inner_core
+! boundary parameters
+ read(27) ibelm_xmin_crust_mantle
+ read(27) ibelm_xmax_crust_mantle
+ read(27) ibelm_ymin_crust_mantle
+ read(27) ibelm_ymax_crust_mantle
+ read(27) ibelm_bottom_crust_mantle
+ read(27) ibelm_top_crust_mantle
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle, &
- iboolfaces_outer_core,iboolfaces_inner_core
+ read(27) normal_xmin_crust_mantle
+ read(27) normal_xmax_crust_mantle
+ read(27) normal_ymin_crust_mantle
+ read(27) normal_ymax_crust_mantle
+ read(27) normal_bottom_crust_mantle
+ read(27) normal_top_crust_mantle
- integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
- integer npoin2D_faces_outer_core(NUMFACES_SHARED)
- integer npoin2D_faces_inner_core(NUMFACES_SHARED)
+ read(27) jacobian2D_xmin_crust_mantle
+ read(27) jacobian2D_xmax_crust_mantle
+ read(27) jacobian2D_ymin_crust_mantle
+ read(27) jacobian2D_ymax_crust_mantle
+ read(27) jacobian2D_bottom_crust_mantle
+ read(27) jacobian2D_top_crust_mantle
+ close(27)
- ! indirect addressing for each corner of the chunks
- integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
- integer, dimension(NGLOB1D_RADIAL_OC,NUMCORNERS_SHARED) :: iboolcorner_outer_core
- integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
- ! communication pattern for faces between chunks
- integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces,imsg_type
- ! communication pattern for corners between chunks
- integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+ ! read parameters to couple fluid and solid regions
+ !
+ ! outer core
- character(len=150) LOCAL_PATH,OUTPUT_FILES
+ ! create name of database
+ call create_name_database(prname,myrank,IREGION_OUTER_CORE,LOCAL_PATH)
- integer, dimension(MAX_NUM_REGIONS) :: NGLOB1D_RADIAL,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX
- integer NGLOB2DMAX_XY
+ ! boundary parameters
- integer NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS
+ ! Stacey put back
+ open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
+ status='old',form='unformatted',action='read')
+ read(27) nspec2D_xmin_outer_core
+ read(27) nspec2D_xmax_outer_core
+ read(27) nspec2D_ymin_outer_core
+ read(27) nspec2D_ymax_outer_core
+ read(27) njunk1
+ read(27) njunk2
- ! for addressing of the slices
- integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
- integer, dimension(0:NPROCTOT_VAL-1) :: ichunk_slice,iproc_xi_slice,iproc_eta_slice
- integer ichunk,iproc_xi,iproc_eta
+ read(27) ibelm_xmin_outer_core
+ read(27) ibelm_xmax_outer_core
+ read(27) ibelm_ymin_outer_core
+ read(27) ibelm_ymax_outer_core
+ read(27) ibelm_bottom_outer_core
+ read(27) ibelm_top_outer_core
+ read(27) normal_xmin_outer_core
+ read(27) normal_xmax_outer_core
+ read(27) normal_ymin_outer_core
+ read(27) normal_ymax_outer_core
+ read(27) normal_bottom_outer_core
+ read(27) normal_top_outer_core
+
+ read(27) jacobian2D_xmin_outer_core
+ read(27) jacobian2D_xmax_outer_core
+ read(27) jacobian2D_ymin_outer_core
+ read(27) jacobian2D_ymax_outer_core
+ read(27) jacobian2D_bottom_outer_core
+ read(27) jacobian2D_top_outer_core
+ close(27)
+
+
+ !
+ ! inner core
+ !
+
+ ! create name of database
+ call create_name_database(prname,myrank,IREGION_INNER_CORE,LOCAL_PATH)
+
+ ! read info for vertical edges for central cube matching in inner core
+ open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
+ status='old',form='unformatted',action='read')
+ read(27) nspec2D_xmin_inner_core
+ read(27) nspec2D_xmax_inner_core
+ read(27) nspec2D_ymin_inner_core
+ read(27) nspec2D_ymax_inner_core
+ read(27) njunk1
+ read(27) njunk2
+
+ ! boundary parameters
+ read(27) ibelm_xmin_inner_core
+ read(27) ibelm_xmax_inner_core
+ read(27) ibelm_ymin_inner_core
+ read(27) ibelm_ymax_inner_core
+ read(27) ibelm_bottom_inner_core
+ read(27) ibelm_top_inner_core
+ close(27)
+
+
+ ! -- Boundary Mesh for crust and mantle ---
+ if (SAVE_BOUNDARY_MESH .and. SIMULATION_TYPE == 3) then
+
+ call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
+
+ open(unit=27,file=prname(1:len_trim(prname))//'boundary_disc.bin', &
+ status='old',form='unformatted',action='read')
+ read(27) njunk1,njunk2,njunk3
+ if (njunk1 /= NSPEC2D_MOHO .and. njunk2 /= NSPEC2D_400 .and. njunk3 /= NSPEC2D_670) &
+ call exit_mpi(myrank, 'Error reading ibelm_disc.bin file')
+ read(27) ibelm_moho_top
+ read(27) ibelm_moho_bot
+ read(27) ibelm_400_top
+ read(27) ibelm_400_bot
+ read(27) ibelm_670_top
+ read(27) ibelm_670_bot
+ read(27) normal_moho
+ read(27) normal_400
+ read(27) normal_670
+ close(27)
+
+ k_top = 1
+ k_bot = NGLLZ
+
+ ! initialization
+ moho_kl = 0.; d400_kl = 0.; d670_kl = 0.; cmb_kl = 0.; icb_kl = 0.
+ endif
+
+ end subroutine read_mesh_databases_coupling
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine read_mesh_databases_addressing()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+
+ implicit none
+
+ include 'mpif.h'
+
! local parameters
integer :: ier,iproc,iproc_read
integer :: NUM_FACES,NPROC_ONE_DIRECTION
@@ -462,12 +579,32 @@
! debug checks with compiled value
!if( NUMMSGS_FACES /= NUMMSGS_FACES_VAL ) then
! print*,'check: NUMMSGS_FACES',NUMMSGS_FACES,NUMMSGS_FACES_VAL
- ! stop 'error NUMMSGS_FACES_VAL, please recompile solver'
+ ! call exit_mpi(myrank,'error NUMMSGS_FACES_VAL, please recompile solver')
!endif
+ end subroutine read_mesh_databases_addressing
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_MPIbuffers()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ implicit none
+
+ ! local parameters
+ integer :: ier
+ character(len=150) :: filename
+
! read 2-D addressing for summation between slices with MPI
! mantle and crust
+ if(myrank == 0) write(IMAIN,*) 'crust/mantle region:'
+
call read_arrays_buffers_solver(IREGION_CRUST_MANTLE,myrank,iboolleft_xi_crust_mantle, &
iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
@@ -480,6 +617,8 @@
NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT_VAL,NPROC_XI_VAL,NPROC_ETA_VAL,LOCAL_PATH,NCHUNKS_VAL)
! outer core
+ if(myrank == 0) write(IMAIN,*) 'outer core region:'
+
call read_arrays_buffers_solver(IREGION_OUTER_CORE,myrank, &
iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
@@ -492,6 +631,8 @@
NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT_VAL,NPROC_XI_VAL,NPROC_ETA_VAL,LOCAL_PATH,NCHUNKS_VAL)
! inner core
+ if(myrank == 0) write(IMAIN,*) 'inner core region:'
+
call read_arrays_buffers_solver(IREGION_INNER_CORE,myrank, &
iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
@@ -503,322 +644,1326 @@
NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE),NGLOB2DMAX_XY,NGLOB1D_RADIAL(IREGION_INNER_CORE), &
NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT_VAL,NPROC_XI_VAL,NPROC_ETA_VAL,LOCAL_PATH,NCHUNKS_VAL)
+ ! added this to reduce the size of the buffers
+ ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ npoin2D_max_all_CM_IC = max(maxval(npoin2D_xi_crust_mantle(:) + npoin2D_xi_inner_core(:)), &
+ maxval(npoin2D_eta_crust_mantle(:) + npoin2D_eta_inner_core(:)))
- end subroutine read_mesh_databases_addressing
+ allocate(buffer_send_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED), &
+ buffer_received_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating mpi buffer')
+ allocate(b_buffer_send_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED), &
+ b_buffer_received_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating mpi b_buffer')
+ ! central cube buffers
+ if(INCLUDE_CENTRAL_CUBE) then
+
+ if(myrank == 0) write(IMAIN,*) 'including central cube'
+
+ ! compute number of messages to expect in cube as well as their size
+ call comp_central_cube_buffer_size(iproc_xi,iproc_eta,ichunk, &
+ NPROC_XI_VAL,NPROC_ETA_VAL,NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
+ nb_msgs_theor_in_cube,npoin2D_cube_from_slices)
+
+ ! this value is used for dynamic memory allocation, therefore make sure it is never zero
+ if(nb_msgs_theor_in_cube > 0) then
+ non_zero_nb_msgs_theor_in_cube = nb_msgs_theor_in_cube
+ else
+ non_zero_nb_msgs_theor_in_cube = 1
+ endif
+
+ ! allocate buffers for cube and slices
+ allocate(sender_from_slices_to_cube(non_zero_nb_msgs_theor_in_cube), &
+ buffer_all_cube_from_slices(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM), &
+ b_buffer_all_cube_from_slices(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM), &
+ buffer_slices(npoin2D_cube_from_slices,NDIM), &
+ b_buffer_slices(npoin2D_cube_from_slices,NDIM), &
+ buffer_slices2(npoin2D_cube_from_slices,NDIM), &
+ ibool_central_cube(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating cube buffers')
+
+ ! handles the communications with the central cube if it was included in the mesh
+ ! create buffers to assemble with the central cube
+ call create_central_cube_buffers(myrank,iproc_xi,iproc_eta,ichunk, &
+ NPROC_XI_VAL,NPROC_ETA_VAL,NCHUNKS_VAL,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+ NSPEC2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NSPEC2DMAX_YMIN_YMAX(IREGION_INNER_CORE), &
+ NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
+ addressing,ibool_inner_core,idoubling_inner_core, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
+ nspec2D_ymin_inner_core,nspec2D_ymax_inner_core, &
+ ibelm_xmin_inner_core,ibelm_xmax_inner_core, &
+ ibelm_ymin_inner_core,ibelm_ymax_inner_core,ibelm_bottom_inner_core, &
+ nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices, &
+ receiver_cube_from_slices,sender_from_slices_to_cube,ibool_central_cube, &
+ buffer_slices,buffer_slices2,buffer_all_cube_from_slices)
+
+ if(myrank == 0) write(IMAIN,*) ''
+
+ else
+
+ ! allocate fictitious buffers for cube and slices with a dummy size
+ ! just to be able to use them as arguments in subroutine calls
+ allocate(sender_from_slices_to_cube(1), &
+ buffer_all_cube_from_slices(1,1,1), &
+ b_buffer_all_cube_from_slices(1,1,1), &
+ buffer_slices(1,1), &
+ b_buffer_slices(1,1), &
+ buffer_slices2(1,1), &
+ ibool_central_cube(1,1),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating dummy buffers')
+
+ endif
+
+ ! note: fix_... routines below update is_on_a_slice_edge_.. arrays:
+ ! assign flags for each element which is on a rim of the slice
+ ! thus, they include elements on top and bottom not shared with other MPI partitions
+ !
+ ! we will re-set these flags when setting up inner/outer elements, but will
+ ! use these arrays for now as initial guess for the search for elements which share a global point
+ ! between different MPI processes
+ call fix_non_blocking_slices(is_on_a_slice_edge_crust_mantle,iboolright_xi_crust_mantle, &
+ iboolleft_xi_crust_mantle,iboolright_eta_crust_mantle,iboolleft_eta_crust_mantle, &
+ npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle,ibool_crust_mantle, &
+ mask_ibool,NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,NGLOB2DMAX_XMIN_XMAX_CM,NGLOB2DMAX_YMIN_YMAX_CM)
+
+ call fix_non_blocking_slices(is_on_a_slice_edge_outer_core,iboolright_xi_outer_core, &
+ iboolleft_xi_outer_core,iboolright_eta_outer_core,iboolleft_eta_outer_core, &
+ npoin2D_xi_outer_core,npoin2D_eta_outer_core,ibool_outer_core, &
+ mask_ibool,NSPEC_OUTER_CORE,NGLOB_OUTER_CORE,NGLOB2DMAX_XMIN_XMAX_OC,NGLOB2DMAX_YMIN_YMAX_OC)
+
+ call fix_non_blocking_slices(is_on_a_slice_edge_inner_core,iboolright_xi_inner_core, &
+ iboolleft_xi_inner_core,iboolright_eta_inner_core,iboolleft_eta_inner_core, &
+ npoin2D_xi_inner_core,npoin2D_eta_inner_core,ibool_inner_core, &
+ mask_ibool,NSPEC_INNER_CORE,NGLOB_INNER_CORE,NGLOB2DMAX_XMIN_XMAX_IC,NGLOB2DMAX_YMIN_YMAX_IC)
+
+ if(INCLUDE_CENTRAL_CUBE) then
+ ! updates flags for elements on slice boundaries
+ call fix_non_blocking_central_cube(is_on_a_slice_edge_inner_core, &
+ ibool_inner_core,NSPEC_INNER_CORE,NGLOB_INNER_CORE,nb_msgs_theor_in_cube,ibelm_bottom_inner_core, &
+ idoubling_inner_core,npoin2D_cube_from_slices,ibool_central_cube, &
+ NSPEC2D_BOTTOM(IREGION_INNER_CORE),ichunk)
+ endif
+
+ ! debug: saves element flags
+ ! crust mantle
+ !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_crust_mantle_proc',myrank
+ !call write_VTK_data_elem_l(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
+ ! xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+ ! ibool_crust_mantle, &
+ ! is_on_a_slice_edge_crust_mantle,filename)
+ ! outer core
+ !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_outer_core_proc',myrank
+ !call write_VTK_data_elem_l(NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
+ ! xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+ ! ibool_outer_core, &
+ ! is_on_a_slice_edge_outer_core,filename)
+!daniel
+ ! inner core
+ write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_inner_core_proc',myrank
+ call write_VTK_data_elem_l(NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ ibool_inner_core, &
+ is_on_a_slice_edge_inner_core,filename)
+
+ end subroutine read_mesh_databases_MPIbuffers
+
+
!
!-------------------------------------------------------------------------------------------------
!
- subroutine read_mesh_databases_coupling(myrank, &
- nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
- nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
- ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle,ibelm_ymin_crust_mantle, &
- ibelm_ymax_crust_mantle,ibelm_bottom_crust_mantle,ibelm_top_crust_mantle, &
- normal_xmin_crust_mantle,normal_xmax_crust_mantle,normal_ymin_crust_mantle, &
- normal_ymax_crust_mantle,normal_bottom_crust_mantle,normal_top_crust_mantle, &
- jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle,jacobian2D_ymin_crust_mantle, &
- jacobian2D_ymax_crust_mantle,jacobian2D_bottom_crust_mantle,jacobian2D_top_crust_mantle, &
- nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
- ibelm_xmin_outer_core,ibelm_xmax_outer_core,ibelm_ymin_outer_core, &
- ibelm_ymax_outer_core,ibelm_bottom_outer_core,ibelm_top_outer_core, &
- normal_xmin_outer_core,normal_xmax_outer_core,normal_ymin_outer_core, &
- normal_ymax_outer_core,normal_bottom_outer_core,normal_top_outer_core, &
- jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core,jacobian2D_ymin_outer_core, &
- jacobian2D_ymax_outer_core,jacobian2D_bottom_outer_core,jacobian2D_top_outer_core, &
- nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
- nspec2D_ymin_inner_core,nspec2D_ymax_inner_core, &
- ibelm_xmin_inner_core,ibelm_xmax_inner_core,ibelm_ymin_inner_core, &
- ibelm_ymax_inner_core,ibelm_bottom_inner_core,ibelm_top_inner_core, &
- ibelm_moho_top,ibelm_moho_bot,ibelm_400_top,ibelm_400_bot, &
- ibelm_670_top,ibelm_670_bot,normal_moho,normal_400,normal_670, &
- k_top,k_bot,moho_kl,d400_kl,d670_kl,cmb_kl,icb_kl, &
- LOCAL_PATH,SIMULATION_TYPE)
+ subroutine read_mesh_databases_MPIinter()
-! to couple mantle with outer core
+! sets up interfaces between MPI processes
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
implicit none
include 'mpif.h'
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+
+ ! local parameters
+ integer :: ier,ndim_assemble
+ character(len=150) :: filename
- integer myrank
+ ! temporary buffers for send and receive between faces of the slices and the chunks
+ real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC) :: &
+ buffer_send_faces_scalar,buffer_received_faces_scalar
+
+ ! assigns initial maximum arrays
+ ! for global slices, maximum number of neighbor is around 17 ( 8 horizontal, max of 8 on bottom )
+ integer, parameter :: MAX_NEIGHBOURS = 8 + NCORNERSCHUNKS_VAL
+ integer, dimension(MAX_NEIGHBOURS) :: my_neighbours,nibool_neighbours
+ integer, dimension(:,:),allocatable :: ibool_neighbours
+ integer :: max_nibool
+ real(kind=CUSTOM_REAL),dimension(:),allocatable :: test_flag
+ real(kind=CUSTOM_REAL),dimension(:),allocatable :: test_flag_cc
+ integer :: i,j,k,ispec,iglob
+
+ ! estimates initial maximum ibool array
+ max_nibool = npoin2D_max_all_CM_IC * NUMFACES_SHARED &
+ + non_zero_nb_msgs_theor_in_cube*npoin2D_cube_from_slices
- ! for crust/oceans coupling
- integer nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
- nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle
- integer, dimension(NSPEC2DMAX_XMIN_XMAX_CM) :: ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle
- integer, dimension(NSPEC2DMAX_YMIN_YMAX_CM) :: ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle
- integer, dimension(NSPEC2D_BOTTOM_CM) :: ibelm_bottom_crust_mantle
- integer, dimension(NSPEC2D_TOP_CM) :: ibelm_top_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_CM) :: jacobian2D_bottom_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_CM) :: jacobian2D_top_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_CM) :: &
- jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_CM) :: &
- jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_CM) :: &
- normal_xmin_crust_mantle,normal_xmax_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2DMAX_YMIN_YMAX_CM) :: &
- normal_ymin_crust_mantle,normal_ymax_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_CM) :: normal_bottom_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_CM) :: normal_top_crust_mantle
+ allocate(ibool_neighbours(max_nibool,MAX_NEIGHBOURS), stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating ibool_neighbours')
- ! arrays to couple with the fluid regions by pointwise matching
- integer nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core
- integer, dimension(NSPEC2DMAX_XMIN_XMAX_OC) :: ibelm_xmin_outer_core,ibelm_xmax_outer_core
- integer, dimension(NSPEC2DMAX_YMIN_YMAX_OC) :: ibelm_ymin_outer_core,ibelm_ymax_outer_core
- integer, dimension(NSPEC2D_BOTTOM_OC) :: ibelm_bottom_outer_core
- integer, dimension(NSPEC2D_TOP_OC) :: ibelm_top_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: jacobian2D_bottom_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_OC) :: jacobian2D_top_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_OC) :: &
- jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_OC) :: &
- jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_OC) :: &
- normal_xmin_outer_core,normal_xmax_outer_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_OC) :: &
- normal_ymin_outer_core,normal_ymax_outer_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: normal_bottom_outer_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_OC) :: normal_top_outer_core
- ! inner core
- integer nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
- nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
- integer, dimension(NSPEC2DMAX_XMIN_XMAX_IC) :: ibelm_xmin_inner_core,ibelm_xmax_inner_core
- integer, dimension(NSPEC2DMAX_YMIN_YMAX_IC) :: ibelm_ymin_inner_core,ibelm_ymax_inner_core
- integer, dimension(NSPEC2D_BOTTOM_IC) :: ibelm_bottom_inner_core
- integer, dimension(NSPEC2D_TOP_IC) :: ibelm_top_inner_core
+! sets up MPI interfaces
+! crust mantle region
+ if( myrank == 0 ) write(IMAIN,*) 'crust mantle mpi:'
+ allocate(test_flag(NGLOB_CRUST_MANTLE), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag')
+
+ ! sets flag to rank id (+1 to avoid problems with zero rank)
+ test_flag(:) = myrank + 1.0
- ! boundary
- integer, dimension(NSPEC2D_MOHO) :: ibelm_moho_top,ibelm_moho_bot
- integer, dimension(NSPEC2D_400) :: ibelm_400_top,ibelm_400_bot
- integer, dimension(NSPEC2D_670) :: ibelm_670_top,ibelm_670_bot
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_MOHO) :: normal_moho
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_400) :: normal_400
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_670) :: normal_670
+ ! assembles values
+ call assemble_MPI_scalar_block(myrank,test_flag, &
+ NGLOB_CRUST_MANTLE, &
+ iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iprocfrom_faces,iprocto_faces,imsg_type, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+ NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
+ NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
+ NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_XY,NCHUNKS_VAL)
+
+ ! removes own myrank id (+1)
+ test_flag(:) = test_flag(:) - ( myrank + 1.0)
- integer k_top,k_bot
+ ! debug: saves array
+ !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_crust_mantle_proc',myrank
+ !call write_VTK_glob_points(NGLOB_CRUST_MANTLE, &
+ ! xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+ ! test_flag,filename)
+
+ ! determines neighbor rank for shared faces
+ call rmd_get_MPI_interfaces(myrank,NGLOB_CRUST_MANTLE,NSPEC_CRUST_MANTLE, &
+ test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+ num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+ max_nibool,MAX_NEIGHBOURS, &
+ ibool_crust_mantle,&
+ is_on_a_slice_edge_crust_mantle, &
+ IREGION_CRUST_MANTLE,.false.)
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_MOHO) :: moho_kl
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_400) :: d400_kl
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_670) :: d670_kl
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_CMB) :: cmb_kl
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_ICB) :: icb_kl
+ deallocate(test_flag)
+
+ ! stores MPI interfaces informations
+ allocate(my_neighbours_crust_mantle(num_interfaces_crust_mantle), &
+ nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_crust_mantle etc.')
- character(len=150) LOCAL_PATH
- integer SIMULATION_TYPE
+ ! copies interfaces arrays
+ if( num_interfaces_crust_mantle > 0 ) then
+ allocate(ibool_interfaces_crust_mantle(max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_crust_mantle')
- ! local parameters
- integer njunk1,njunk2,njunk3
- character(len=150) prname
+ ! ranks of neighbour processes
+ my_neighbours_crust_mantle(:) = my_neighbours(1:num_interfaces_crust_mantle)
+ ! number of global ibool entries on each interface
+ nibool_interfaces_crust_mantle(:) = nibool_neighbours(1:num_interfaces_crust_mantle)
+ ! global iglob point ids on each interface
+ ibool_interfaces_crust_mantle(:,:) = ibool_neighbours(1:max_nibool_interfaces_crust_mantle,1:num_interfaces_crust_mantle)
+ else
+ ! dummy allocation (fortran90 should allow allocate statement with zero array size)
+ max_nibool_interfaces_crust_mantle = 0
+ allocate(ibool_interfaces_crust_mantle(0,0),stat=ier)
+ endif
+
+ ! debug: saves 1. MPI interface
+ !if( num_interfaces_crust_mantle >= 1 ) then
+ ! write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_1_points_crust_mantle_proc',myrank
+ ! call write_VTK_data_points(NGLOB_CRUST_MANTLE, &
+ ! xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+ ! ibool_interfaces_crust_mantle(1:nibool_interfaces_crust_mantle(1),1), &
+ ! nibool_interfaces_crust_mantle(1),filename)
+ !endif
- ! crust and mantle
- ! create name of database
- call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
+! outer core region
+ if( myrank == 0 ) write(IMAIN,*) 'outer core mpi:'
- ! Stacey put back
- open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
- status='old',form='unformatted',action='read')
- read(27) nspec2D_xmin_crust_mantle
- read(27) nspec2D_xmax_crust_mantle
- read(27) nspec2D_ymin_crust_mantle
- read(27) nspec2D_ymax_crust_mantle
- read(27) njunk1
- read(27) njunk2
+ allocate(test_flag(NGLOB_OUTER_CORE), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag outer core')
+
+ ! sets flag to rank id (+1 to avoid problems with zero rank)
+ test_flag(:) = myrank + 1.0
-! boundary parameters
- read(27) ibelm_xmin_crust_mantle
- read(27) ibelm_xmax_crust_mantle
- read(27) ibelm_ymin_crust_mantle
- read(27) ibelm_ymax_crust_mantle
- read(27) ibelm_bottom_crust_mantle
- read(27) ibelm_top_crust_mantle
+ ! assembles values
+ call assemble_MPI_scalar_block(myrank,test_flag, &
+ NGLOB_OUTER_CORE, &
+ iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
+ npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
+ iboolfaces_outer_core,iboolcorner_outer_core, &
+ iprocfrom_faces,iprocto_faces,imsg_type, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+ NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
+ NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
+ NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE),NGLOB2DMAX_XY,NCHUNKS_VAL)
- read(27) normal_xmin_crust_mantle
- read(27) normal_xmax_crust_mantle
- read(27) normal_ymin_crust_mantle
- read(27) normal_ymax_crust_mantle
- read(27) normal_bottom_crust_mantle
- read(27) normal_top_crust_mantle
+
+ ! removes own myrank id (+1)
+ test_flag(:) = test_flag(:) - ( myrank + 1.0)
- read(27) jacobian2D_xmin_crust_mantle
- read(27) jacobian2D_xmax_crust_mantle
- read(27) jacobian2D_ymin_crust_mantle
- read(27) jacobian2D_ymax_crust_mantle
- read(27) jacobian2D_bottom_crust_mantle
- read(27) jacobian2D_top_crust_mantle
- close(27)
+ ! debug: saves array
+ !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_outer_core_proc',myrank
+ !call write_VTK_glob_points(NGLOB_OUTER_CORE, &
+ ! xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+ ! test_flag,filename)
+
+ ! determines neighbor rank for shared faces
+ call rmd_get_MPI_interfaces(myrank,NGLOB_OUTER_CORE,NSPEC_OUTER_CORE, &
+ test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+ num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+ max_nibool,MAX_NEIGHBOURS, &
+ ibool_outer_core,&
+ is_on_a_slice_edge_outer_core, &
+ IREGION_OUTER_CORE,.false.)
- ! read parameters to couple fluid and solid regions
- !
- ! outer core
+ deallocate(test_flag)
+
+ ! stores MPI interfaces informations
+ allocate(my_neighbours_outer_core(num_interfaces_outer_core), &
+ nibool_interfaces_outer_core(num_interfaces_outer_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_outer_core etc.')
- ! create name of database
- call create_name_database(prname,myrank,IREGION_OUTER_CORE,LOCAL_PATH)
+ ! copies interfaces arrays
+ if( num_interfaces_outer_core > 0 ) then
+ allocate(ibool_interfaces_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_outer_core')
- ! boundary parameters
+ ! ranks of neighbour processes
+ my_neighbours_outer_core(:) = my_neighbours(1:num_interfaces_outer_core)
+ ! number of global ibool entries on each interface
+ nibool_interfaces_outer_core(:) = nibool_neighbours(1:num_interfaces_outer_core)
+ ! global iglob point ids on each interface
+ ibool_interfaces_outer_core(:,:) = ibool_neighbours(1:max_nibool_interfaces_outer_core,1:num_interfaces_outer_core)
+ else
+ ! dummy allocation (fortran90 should allow allocate statement with zero array size)
+ max_nibool_interfaces_outer_core = 0
+ allocate(ibool_interfaces_outer_core(0,0),stat=ier)
+ endif
- ! Stacey put back
- open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
- status='old',form='unformatted',action='read')
- read(27) nspec2D_xmin_outer_core
- read(27) nspec2D_xmax_outer_core
- read(27) nspec2D_ymin_outer_core
- read(27) nspec2D_ymax_outer_core
- read(27) njunk1
- read(27) njunk2
+ ! debug: saves 1. MPI interface
+ !if( num_interfaces_outer_core >= 1 ) then
+ ! write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_1_points_outer_core_proc',myrank
+ ! call write_VTK_data_points(NGLOB_OUTER_CORE, &
+ ! xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+ ! ibool_interfaces_outer_core(1:nibool_interfaces_outer_core(1),1), &
+ ! nibool_interfaces_outer_core(1),filename)
+ !endif
- read(27) ibelm_xmin_outer_core
- read(27) ibelm_xmax_outer_core
- read(27) ibelm_ymin_outer_core
- read(27) ibelm_ymax_outer_core
- read(27) ibelm_bottom_outer_core
- read(27) ibelm_top_outer_core
+! inner core
+ if( myrank == 0 ) write(IMAIN,*) 'inner core mpi:'
- read(27) normal_xmin_outer_core
- read(27) normal_xmax_outer_core
- read(27) normal_ymin_outer_core
- read(27) normal_ymax_outer_core
- read(27) normal_bottom_outer_core
- read(27) normal_top_outer_core
+ allocate(test_flag(NGLOB_INNER_CORE), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag inner core')
+
+ ! sets flag to rank id (+1 to avoid problems with zero rank)
+ test_flag(:) = 0.0
+ do ispec=1,NSPEC_INNER_CORE
+ ! suppress fictitious elements in central cube
+ if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE) cycle
+ do k = 1,NGLLZ
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ iglob = ibool_inner_core(i,j,k,ispec)
+ test_flag(iglob) = myrank + 1.0
+ enddo
+ enddo
+ enddo
+ enddo
- read(27) jacobian2D_xmin_outer_core
- read(27) jacobian2D_xmax_outer_core
- read(27) jacobian2D_ymin_outer_core
- read(27) jacobian2D_ymax_outer_core
- read(27) jacobian2D_bottom_outer_core
- read(27) jacobian2D_top_outer_core
- close(27)
+ ! assembles values
+ call assemble_MPI_scalar_block(myrank,test_flag, &
+ NGLOB_INNER_CORE, &
+ iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces,imsg_type, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+ NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
+ NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_INNER_CORE), &
+ NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE),NGLOB2DMAX_XY,NCHUNKS_VAL)
+ ! removes own myrank id (+1)
+ test_flag(:) = test_flag(:) - ( myrank + 1.0)
+ where( test_flag(:) < 0.0 ) test_flag(:) = 0.0
- !
+ ! debug: saves array
+ write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_inner_core_A_proc',myrank
+ call write_VTK_glob_points(NGLOB_INNER_CORE, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ test_flag,filename)
+
+! ! gets new interfaces for inner_core without central cube yet
+! ! determines neighbor rank for shared faces
+! call rmd_get_MPI_interfaces(myrank,NGLOB_INNER_CORE,NSPEC_INNER_CORE, &
+! test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+! num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+! max_nibool,MAX_NEIGHBOURS, &
+! ibool_inner_core,&
+! is_on_a_slice_edge_inner_core, &
+! IREGION_INNER_CORE,.false.,idoubling_inner_core)
+
+ ! including central cube
+ if(INCLUDE_CENTRAL_CUBE) then
+ if( myrank == 0 ) write(IMAIN,*) 'inner core with central cube mpi:'
+
+ allocate(test_flag_cc(NGLOB_INNER_CORE), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag_cc inner core')
+
+ ! re-sets flag to rank id (+1 to avoid problems with zero rank)
+ test_flag_cc(:) = 0.0
+ do ispec=1,NSPEC_INNER_CORE
+ ! suppress fictitious elements in central cube
+ if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE) cycle
+ do k = 1,NGLLZ
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ iglob = ibool_inner_core(i,j,k,ispec)
+ test_flag_cc(iglob) = myrank + 1.0
+ enddo
+ enddo
+ enddo
+ enddo
+
+ ! test_flag is a scalar, not a vector
+ ndim_assemble = 1
+ ! use central cube buffers to assemble the inner core mass matrix with the central cube
+ call assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices, buffer_all_cube_from_slices, &
+ buffer_slices, buffer_slices2, ibool_central_cube, &
+ receiver_cube_from_slices, ibool_inner_core, &
+ idoubling_inner_core, NSPEC_INNER_CORE, &
+ ibelm_bottom_inner_core, NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
+ NGLOB_INNER_CORE, &
+ test_flag_cc,ndim_assemble)
+
+
+ ! removes own myrank id (+1)
+ test_flag_cc(:) = test_flag_cc(:) - ( myrank + 1.0)
+ where( test_flag_cc(:) < 0.0 ) test_flag_cc(:) = 0.0
+
+ write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_inner_core_B_proc',myrank
+ call write_VTK_glob_points(NGLOB_INNER_CORE, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ test_flag_cc,filename)
+
+! ! adds additional inner core points
+! call rmd_get_MPI_interfaces(myrank,NGLOB_INNER_CORE,NSPEC_INNER_CORE, &
+! test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+! num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+! max_nibool,MAX_NEIGHBOURS, &
+! ibool_inner_core,&
+! is_on_a_slice_edge_inner_core, &
+! IREGION_INNER_CORE,.true.,idoubling_inner_core)
+
+ ! adds both together
+ test_flag(:) = test_flag(:) + test_flag_cc(:)
+
+ deallocate(test_flag_cc)
+
+ ! debug: saves array
+ write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_inner_core_C_proc',myrank
+ call write_VTK_glob_points(NGLOB_INNER_CORE, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ test_flag,filename)
+
+ endif
+
+ ! gets new interfaces for inner_core without central cube yet
+ ! determines neighbor rank for shared faces
+ call rmd_get_MPI_interfaces(myrank,NGLOB_INNER_CORE,NSPEC_INNER_CORE, &
+ test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+ num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+ max_nibool,MAX_NEIGHBOURS, &
+ ibool_inner_core,&
+ is_on_a_slice_edge_inner_core, &
+ IREGION_INNER_CORE,.false.,idoubling_inner_core)
+
+ deallocate(test_flag)
+
+ ! stores MPI interfaces informations
+ allocate(my_neighbours_inner_core(num_interfaces_inner_core), &
+ nibool_interfaces_inner_core(num_interfaces_inner_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_inner_core etc.')
+
+ ! copies interfaces arrays
+ if( num_interfaces_inner_core > 0 ) then
+ allocate(ibool_interfaces_inner_core(max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_inner_core')
+
+ ! ranks of neighbour processes
+ my_neighbours_inner_core(:) = my_neighbours(1:num_interfaces_inner_core)
+ ! number of global ibool entries on each interface
+ nibool_interfaces_inner_core(:) = nibool_neighbours(1:num_interfaces_inner_core)
+ ! global iglob point ids on each interface
+ ibool_interfaces_inner_core(:,:) = ibool_neighbours(1:max_nibool_interfaces_inner_core,1:num_interfaces_inner_core)
+ else
+ ! dummy allocation (fortran90 should allow allocate statement with zero array size)
+ max_nibool_interfaces_inner_core = 0
+ allocate(ibool_interfaces_inner_core(0,0),stat=ier)
+ endif
+
+ ! debug: saves 1. MPI interface
+ if( myrank == 0 .and. num_interfaces_inner_core >= 1 ) then
+ write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_1_points_inner_core_proc',myrank
+ call write_VTK_data_points(NGLOB_INNER_CORE, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ ibool_interfaces_inner_core(1:nibool_interfaces_inner_core(1),1), &
+ nibool_interfaces_inner_core(1),filename)
+ !print*,'saved: ',trim(filename)//'.vtk'
+ endif
+
+ ! synchronizes MPI processes
+ call sync_all()
+
+ ! frees temporary array
+ deallocate(ibool_neighbours)
+
+
+ ! allocates MPI buffers
+ ! crust mantle
+ allocate(buffer_send_vector_crust_mantle(NDIM,max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
+ buffer_recv_vector_crust_mantle(NDIM,max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
+ request_send_vector_crust_mantle(num_interfaces_crust_mantle), &
+ request_recv_vector_crust_mantle(num_interfaces_crust_mantle), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_crust_mantle etc.')
+ if( SIMULATION_TYPE == 3 ) then
+ allocate(b_buffer_send_vector_crust_mantle(NDIM,max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
+ b_buffer_recv_vector_crust_mantle(NDIM,max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
+ b_request_send_vector_crust_mantle(num_interfaces_crust_mantle), &
+ b_request_recv_vector_crust_mantle(num_interfaces_crust_mantle), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_crust_mantle etc.')
+ endif
+
+ ! outer core
+ allocate(buffer_send_scalar_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
+ buffer_recv_scalar_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
+ request_send_scalar_outer_core(num_interfaces_outer_core), &
+ request_recv_scalar_outer_core(num_interfaces_outer_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_outer_core etc.')
+ if( SIMULATION_TYPE == 3 ) then
+ allocate(b_buffer_send_scalar_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
+ b_buffer_recv_scalar_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
+ b_request_send_scalar_outer_core(num_interfaces_outer_core), &
+ b_request_recv_scalar_outer_core(num_interfaces_outer_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_outer_core etc.')
+ endif
+
! inner core
- !
+ allocate(buffer_send_vector_inner_core(NDIM,max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
+ buffer_recv_vector_inner_core(NDIM,max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
+ request_send_vector_inner_core(num_interfaces_inner_core), &
+ request_recv_vector_inner_core(num_interfaces_inner_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_inner_core etc.')
+ if( SIMULATION_TYPE == 3 ) then
+ allocate(b_buffer_send_vector_inner_core(NDIM,max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
+ b_buffer_recv_vector_inner_core(NDIM,max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
+ b_request_send_vector_inner_core(num_interfaces_inner_core), &
+ b_request_recv_vector_inner_core(num_interfaces_inner_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_inner_core etc.')
+ endif
+
+ end subroutine read_mesh_databases_MPIinter
- ! create name of database
- call create_name_database(prname,myrank,IREGION_INNER_CORE,LOCAL_PATH)
- ! read info for vertical edges for central cube matching in inner core
- open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
- status='old',form='unformatted',action='read')
- read(27) nspec2D_xmin_inner_core
- read(27) nspec2D_xmax_inner_core
- read(27) nspec2D_ymin_inner_core
- read(27) nspec2D_ymax_inner_core
- read(27) njunk1
- read(27) njunk2
+!
+!-------------------------------------------------------------------------------------------------
+!
- ! boundary parameters
- read(27) ibelm_xmin_inner_core
- read(27) ibelm_xmax_inner_core
- read(27) ibelm_ymin_inner_core
- read(27) ibelm_ymax_inner_core
- read(27) ibelm_bottom_inner_core
- read(27) ibelm_top_inner_core
- close(27)
+ subroutine rmd_get_MPI_interfaces(myrank,NGLOB,NSPEC, &
+ test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+ num_interfaces,max_nibool_interfaces, &
+ max_nibool,MAX_NEIGHBOURS, &
+ ibool,&
+ is_on_a_slice_edge, &
+ IREGION,add_central_cube,idoubling)
+ use constants
+
+ implicit none
- ! -- Boundary Mesh for crust and mantle ---
- if (SAVE_BOUNDARY_MESH .and. SIMULATION_TYPE == 3) then
+ include 'mpif.h'
- call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
+ integer,intent(in) :: myrank,NGLOB,NSPEC
- open(unit=27,file=prname(1:len_trim(prname))//'boundary_disc.bin', &
- status='old',form='unformatted',action='read')
- read(27) njunk1,njunk2,njunk3
- if (njunk1 /= NSPEC2D_MOHO .and. njunk2 /= NSPEC2D_400 .and. njunk3 /= NSPEC2D_670) &
- call exit_mpi(myrank, 'Error reading ibelm_disc.bin file')
- read(27) ibelm_moho_top
- read(27) ibelm_moho_bot
- read(27) ibelm_400_top
- read(27) ibelm_400_bot
- read(27) ibelm_670_top
- read(27) ibelm_670_bot
- read(27) normal_moho
- read(27) normal_400
- read(27) normal_670
- close(27)
+ real(kind=CUSTOM_REAL),dimension(NGLOB),intent(in) :: test_flag
- k_top = 1
- k_bot = NGLLZ
+ integer,intent(in) :: max_nibool
+ integer,intent(in) :: MAX_NEIGHBOURS
+ integer, dimension(MAX_NEIGHBOURS),intent(inout) :: my_neighbours,nibool_neighbours
+ integer, dimension(max_nibool,MAX_NEIGHBOURS),intent(inout) :: ibool_neighbours
- ! initialization
- moho_kl = 0.; d400_kl = 0.; d670_kl = 0.; cmb_kl = 0.; icb_kl = 0.
+ integer,intent(out) :: num_interfaces,max_nibool_interfaces
+
+ integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: ibool
+
+ logical,dimension(NSPEC),intent(inout) :: is_on_a_slice_edge
+
+ integer,intent(in) :: IREGION
+ logical,intent(in) :: add_central_cube
+ integer,dimension(NSPEC),optional:: idoubling
+
+ ! local parameters
+ integer :: ispec,iglob,j,k
+ integer :: iface,iedge,icorner
+ integer :: ii,iinterface,icurrent,rank
+ integer :: npoin
+ logical :: is_done,ispec_is_outer
+ integer,dimension(NGLOB) :: work_test_flag
+ logical,dimension(NSPEC) :: work_ispec_is_outer
+
+ ! initializes
+ if( add_central_cube) then
+ ! adds points to existing inner_core interfaces
+ iinterface = num_interfaces
+ work_ispec_is_outer(:) = is_on_a_slice_edge(:)
+ else
+ ! creates new interfaces
+ iinterface = 0
+ num_interfaces = 0
+ max_nibool_interfaces = 0
+ my_neighbours(:) = -1
+ nibool_neighbours(:) = 0
+ ibool_neighbours(:,:) = 0
+ work_ispec_is_outer(:) = .false.
endif
+
+ ! makes working copy (converted to nearest integers)
+ work_test_flag(:) = nint( test_flag(:) )
- end subroutine read_mesh_databases_coupling
+ ! loops over all elements
+ do ispec = 1,NSPEC
+ ! exclude elements in inner part of slice
+ !if( .not. is_on_a_slice_edge(ispec) ) cycle
+ ! exclude elements in fictitious core
+ if( IREGION == IREGION_INNER_CORE) then
+ if( idoubling(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) cycle
+ endif
+
+ ! sets flag if element has global points shared with other processes
+ ispec_is_outer = .false.
+
+ ! 1. finds neighbours which share a whole face with this process
+ ! (faces are shared only with 1 other neighbour process)
+
+ ! loops over all faces of element
+ do iface = 1, 6
+
+ ! chooses a point inside face
+ select case( iface )
+ case( 1 )
+ ! face I == 1
+ iglob = ibool(1,2,2,ispec)
+ case( 2 )
+ ! face I == NGLLX
+ iglob = ibool(NGLLX,2,2,ispec)
+ case( 3 )
+ ! face J == 1
+ iglob = ibool(2,1,2,ispec)
+ case( 4 )
+ ! face J == NGLLY
+ iglob = ibool(2,NGLLY,2,ispec)
+ case( 5 )
+ ! face K == 1
+ iglob = ibool(2,2,1,ispec)
+ case( 6 )
+ ! face K == NGLLZ
+ iglob = ibool(2,2,NGLLZ,ispec)
+ end select
+
+ ! checks assembled flag on global point
+ if( work_test_flag(iglob) > 0 ) then
+ ispec_is_outer = .true.
+
+ ! rank of neighbor process
+ rank = work_test_flag(iglob) - 1
+
+ ! checks ranks range
+ if( rank < 0 .or. rank >= NPROCTOT_VAL ) then
+ print*,'error face rank: ',myrank,'ispec=',ispec
+ print*,' neighbor rank = ',rank,'exceeds total nproc:',NPROCTOT_VAL
+ print*,' face ',iface
+ call exit_mpi(myrank,'error face neighbor mpi rank')
+ endif
+
+ ! checks if already stored
+ icurrent = 0
+ is_done = .false.
+ do ii = 1,iinterface
+ if( rank == my_neighbours(ii) ) then
+ icurrent = ii
+ is_done = .true.
+ exit
+ endif
+ enddo
+
+ ! updates interfaces array
+ if( .not. is_done ) then
+ iinterface = iinterface + 1
+ if( iinterface > MAX_NEIGHBOURS ) &
+ call exit_mpi(myrank,'interface face exceeds MAX_NEIGHBOURS range')
+ ! adds as neighbor new interface
+ my_neighbours(iinterface) = rank
+ icurrent = iinterface
+ endif
+ if( icurrent == 0 ) &
+ call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
+
+ ! adds interface points and removes neighbor flag from face
+ ! assumes NGLLX == NGLLY == NGLLZ
+ do k=1,NGLLX
+ do j=1,NGLLX
+ select case( iface )
+ case( 1 )
+ ! face I == 1
+ iglob = ibool(1,j,k,ispec)
+ case( 2 )
+ ! face I == NGLLX
+ iglob = ibool(NGLLX,j,k,ispec)
+ case( 3 )
+ ! face J == 1
+ iglob = ibool(j,1,k,ispec)
+ case( 4 )
+ ! face J == NGLLY
+ iglob = ibool(j,NGLLY,k,ispec)
+ case( 5 )
+ ! face K == 1
+ iglob = ibool(j,k,1,ispec)
+ case( 6 )
+ ! face K == NGLLZ
+ iglob = ibool(j,k,NGLLZ,ispec)
+ end select
+
+ ! checks that we take each global point (on edges and corners) only once
+ if( work_test_flag(iglob) <= 0 ) cycle ! continues to next point
+
+ ! increases number of total points on this interface
+ nibool_neighbours(icurrent) = nibool_neighbours(icurrent) + 1
+ if( nibool_neighbours(icurrent) > max_nibool) &
+ call exit_mpi(myrank,'interface face exceeds max_nibool range')
+
+ ! stores interface iglob index
+ ibool_neighbours( nibool_neighbours(icurrent),icurrent ) = iglob
+
+ ! re-sets flag
+ work_test_flag(iglob) = work_test_flag(iglob) - ( rank + 1 )
+ ! debug
+ if( work_test_flag(iglob) < 0 ) then
+ print*,'error face flag:',myrank,'ispec=',ispec,'rank=',rank
+ print*,' flag=',work_test_flag(iglob),'iface jk=',iface,j,k
+ call exit_mpi(myrank,'error face flag')
+ endif
+ enddo
+ enddo
+ endif
+ enddo ! iface
+
+ ! 2. finds neighbours which share a single edge with this process
+ ! note: by now, faces have subtracted their neighbours, edges can hold only one more process info
+
+ ! loops over all edges of element
+ do iedge = 1, 12
+
+ ! chooses a point inside edge but not corner
+ select case( iedge )
+ case( 1 )
+ ! face I == 1, J == 1
+ iglob = ibool(1,1,2,ispec)
+ case( 2 )
+ ! face I == 1, J == NGLLY
+ iglob = ibool(1,NGLLY,2,ispec)
+ case( 3 )
+ ! face I == 1, K == 1
+ iglob = ibool(1,2,1,ispec)
+ case( 4 )
+ ! face I == 1, K == NGLLZ
+ iglob = ibool(1,2,NGLLZ,ispec)
+ case( 5 )
+ ! face I == NGLLX, J == 1
+ iglob = ibool(NGLLX,1,2,ispec)
+ case( 6 )
+ ! face I == NGLLX, J == NGLLY
+ iglob = ibool(NGLLX,NGLLY,2,ispec)
+ case( 7 )
+ ! face I == NGLLX, K == 1
+ iglob = ibool(NGLLX,2,1,ispec)
+ case( 8 )
+ ! face I == NGLLX, K == NGLLZ
+ iglob = ibool(NGLLX,2,NGLLZ,ispec)
+ case( 9 )
+ ! face J == 1, K == 1
+ iglob = ibool(2,1,1,ispec)
+ case( 10 )
+ ! face J == 1, K == NGLLZ
+ iglob = ibool(2,1,NGLLZ,ispec)
+ case( 11 )
+ ! face J == NGLLY, K == 1
+ iglob = ibool(2,NGLLY,1,ispec)
+ case( 12 )
+ ! face J == NGLLY, K == NGLLZ
+ iglob = ibool(2,NGLLY,NGLLZ,ispec)
+ end select
+
+ ! checks assembled flag on global point
+ if( work_test_flag(iglob) > 0 ) then
+ ispec_is_outer = .true.
+
+ ! rank of neighbor process
+ rank = work_test_flag(iglob) - 1
+
+ ! checks ranks range
+ if( rank < 0 .or. rank >= NPROCTOT_VAL ) then
+ print*,'error egde rank: ',myrank
+ print*,' neighbor rank = ',rank,'exceeds total nproc:',NPROCTOT_VAL
+ print*,' edge ',iedge
+ call exit_mpi(myrank,'error edge neighbor mpi rank')
+ endif
+
+ ! checks if already stored
+ icurrent = 0
+ is_done = .false.
+ do ii = 1,iinterface
+ if( rank == my_neighbours(ii) ) then
+ icurrent = ii
+ is_done = .true.
+ exit
+ endif
+ enddo
+
+ ! updates interfaces array
+ if( .not. is_done ) then
+ iinterface = iinterface + 1
+ if( iinterface > MAX_NEIGHBOURS ) &
+ call exit_mpi(myrank,'interface edge exceeds MAX_NEIGHBOURS range')
+ ! adds as neighbor new interface
+ my_neighbours(iinterface) = rank
+ icurrent = iinterface
+ endif
+ if( icurrent == 0 ) &
+ call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
+
+ ! adds interface points and removes neighbor flag from edge
+ ! assumes NGLLX == NGLLY == NGLLZ
+ do k = 1,NGLLX
+ select case( iedge )
+ case( 1 )
+ ! face I == 1, J == 1
+ iglob = ibool(1,1,k,ispec)
+ case( 2 )
+ ! face I == 1, J == NGLLY
+ iglob = ibool(1,NGLLY,k,ispec)
+ case( 3 )
+ ! face I == 1, K == 1
+ iglob = ibool(1,k,1,ispec)
+ case( 4 )
+ ! face I == 1, K == NGLLZ
+ iglob = ibool(1,k,NGLLZ,ispec)
+ case( 5 )
+ ! face I == NGLLX, J == 1
+ iglob = ibool(NGLLX,1,k,ispec)
+ case( 6 )
+ ! face I == NGLLX, J == NGLLY
+ iglob = ibool(NGLLX,NGLLY,k,ispec)
+ case( 7 )
+ ! face I == NGLLX, K == 1
+ iglob = ibool(NGLLX,k,1,ispec)
+ case( 8 )
+ ! face I == NGLLX, K == NGLLZ
+ iglob = ibool(NGLLX,k,NGLLZ,ispec)
+ case( 9 )
+ ! face J == 1, K == 1
+ iglob = ibool(k,1,1,ispec)
+ case( 10 )
+ ! face J == 1, K == NGLLZ
+ iglob = ibool(k,1,NGLLZ,ispec)
+ case( 11 )
+ ! face J == NGLLY, K == 1
+ iglob = ibool(k,NGLLY,1,ispec)
+ case( 12 )
+ ! face J == NGLLY, K == NGLLZ
+ iglob = ibool(k,NGLLY,NGLLZ,ispec)
+ end select
+
+ ! checks that we take each global point (on edges and corners) only once
+ if( work_test_flag(iglob) <= 0 ) cycle ! continues to next point
+
+ ! increases number of total points on this interface
+ nibool_neighbours(icurrent) = nibool_neighbours(icurrent) + 1
+ if( nibool_neighbours(icurrent) > max_nibool) &
+ call exit_mpi(myrank,'interface edge exceeds max_nibool range')
+
+ ! stores interface iglob index
+ ibool_neighbours( nibool_neighbours(icurrent),icurrent ) = iglob
+
+ ! re-sets flag
+ work_test_flag(iglob) = work_test_flag(iglob) - ( rank + 1 )
+
+ ! debug
+ if( work_test_flag(iglob) < 0 ) call exit_mpi(myrank,'error edge flag')
+
+ enddo
+ endif
+ enddo ! iedge
+
+
+ ! 3. finds neighbours which share a single corner with this process
+ ! note: faces and edges have subtracted their neighbors, only one more process left possible
+
+ ! loops over all corners of element
+ do icorner = 1, 8
+
+ ! chooses a corner point
+ select case( icorner )
+ case( 1 )
+ ! face I == 1
+ iglob = ibool(1,1,1,ispec)
+ case( 2 )
+ ! face I == 1
+ iglob = ibool(1,NGLLY,1,ispec)
+ case( 3 )
+ ! face I == 1
+ iglob = ibool(1,1,NGLLZ,ispec)
+ case( 4 )
+ ! face I == 1
+ iglob = ibool(1,NGLLY,NGLLZ,ispec)
+ case( 5 )
+ ! face I == NGLLX
+ iglob = ibool(NGLLX,1,1,ispec)
+ case( 6 )
+ ! face I == NGLLX
+ iglob = ibool(NGLLX,NGLLY,1,ispec)
+ case( 7 )
+ ! face I == NGLLX
+ iglob = ibool(NGLLX,1,NGLLZ,ispec)
+ case( 8 )
+ ! face I == NGLLX
+ iglob = ibool(NGLLX,NGLLY,NGLLZ,ispec)
+ end select
+
+ ! makes sure that all elements on mpi interfaces are included
+ ! uses original test_flag array, since the working copy reduces values
+ ! note: there can be elements which have an edge or corner shared with
+ ! other mpi partitions, but have the work_test_flag value already set to zero
+ ! since the iglob point was found before.
+ ! also, this check here would suffice to determine the outer flag, but we also include the
+ ! check everywhere we encounter it too
+ if( test_flag(iglob) > 0.5 ) then
+ ispec_is_outer = .true.
+ endif
+
+ ! checks assembled flag on global point
+ if( work_test_flag(iglob) > 0 ) then
+ ispec_is_outer = .true.
+
+ ! rank of neighbor process
+ rank = work_test_flag(iglob) - 1
+
+ ! checks ranks range
+ if( rank < 0 .or. rank >= NPROCTOT_VAL ) then
+ print*,'error corner: ',myrank
+ print*,' neighbor rank = ',rank,'exceeds total nproc:',NPROCTOT_VAL
+ print*,' corner ',icorner
+ call exit_mpi(myrank,'error corner neighbor mpi rank')
+ endif
+
+ ! checks if already stored
+ icurrent = 0
+ is_done = .false.
+ do ii = 1,iinterface
+ if( rank == my_neighbours(ii) ) then
+ icurrent = ii
+ is_done = .true.
+ exit
+ endif
+ enddo
+
+ ! updates interfaces array
+ if( .not. is_done ) then
+ iinterface = iinterface + 1
+ if( iinterface > MAX_NEIGHBOURS ) &
+ call exit_mpi(myrank,'interface corner exceed MAX_NEIGHBOURS range')
+ ! adds as neighbor new interface
+ my_neighbours(iinterface) = rank
+ icurrent = iinterface
+ endif
+ if( icurrent == 0 ) &
+ call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
+
+ ! adds this corner as interface point and removes neighbor flag from face
+ ! increases number of total points on this interface
+ nibool_neighbours(icurrent) = nibool_neighbours(icurrent) + 1
+ if( nibool_neighbours(icurrent) > max_nibool) &
+ call exit_mpi(myrank,'interface corner exceeds max_nibool range')
+
+ ! stores interface iglob index
+ ibool_neighbours( nibool_neighbours(icurrent),icurrent ) = iglob
+
+ ! re-sets flag
+ work_test_flag(iglob) = work_test_flag(iglob) - ( rank + 1 )
+
+ ! debug
+ if( work_test_flag(iglob) < 0 ) call exit_mpi(myrank,'error corner flag')
+
+ endif
+
+ enddo ! icorner
+
+ ! stores flags for outer elements when recognized as such
+ ! (inner/outer elements separated for non-blocking mpi communications)
+ if( ispec_is_outer ) then
+ work_ispec_is_outer(ispec) = .true.
+ endif
+
+ enddo
+
+ ! number of outer elements (on MPI interfaces)
+ npoin = count( work_ispec_is_outer )
+
+ ! debug: user output
+ if( myrank == 0 ) then
+ write(IMAIN,*) ' interfaces : ',iinterface
+ write(IMAIN,*) ' my_neighbours: ',my_neighbours(1:iinterface)
+ write(IMAIN,*) ' nibool_neighbours: ',nibool_neighbours(1:iinterface)
+ write(IMAIN,*) ' test flag min/max: ',minval(work_test_flag),maxval(work_test_flag)
+ write(IMAIN,*) ' outer elements: ',npoin
+ write(IMAIN,*)
+ endif
+ call sync_all()
+
+ ! checks if all points were recognized
+ if( maxval(work_test_flag) > 0 ) then
+ print*,'error mpi interface rank: ',myrank
+ print*,' work_test_flag min/max :',minval(work_test_flag),maxval(work_test_flag)
+ call exit_mpi(myrank,'error: mpi points remain unrecognized, please check mesh interfaces')
+ endif
+
+ ! checks if all points were taken only once
+ if( minval(work_test_flag) < 0 ) then
+ print*,'error mpi interface rank: ',myrank
+ print*,' work_test_flag min/max :',minval(work_test_flag),maxval(work_test_flag)
+ call exit_mpi(myrank,'error: mpi points counted more than once, please check mesh interfaces')
+ endif
+
+ ! sets interfaces infos
+ num_interfaces = iinterface
+ max_nibool_interfaces = maxval( nibool_neighbours(1:num_interfaces) )
+
+ ! optional: ibool usually is already sorted,
+ ! this makes sure ibool_neighbours arrays are still sorted
+ ! (iglob indices in increasing order; we will access acceleration fields accel(:,iglob),
+ ! thus it helps if iglob strides are short and accesses are close-by)
+ do iinterface = 1,num_interfaces
+ npoin = nibool_neighbours(iinterface)
+ call heap_sort( npoin, ibool_neighbours(1:npoin,iinterface) )
+
+ ! debug: checks if unique set of iglob values
+ do j=1,npoin-1
+ if( ibool_neighbours(j,iinterface) == ibool_neighbours(j+1,iinterface) ) then
+ print*,'error mpi interface rank:',myrank
+ print*,' interface: ',my_neighbours(iinterface),'point: ',j,'of',npoin
+ call exit_mpi(myrank,'error: mpi points not unique on interface')
+ endif
+ enddo
+ enddo
+
+
+ ! re-sets flags for outer elements
+ is_on_a_slice_edge(:) = work_ispec_is_outer(:)
+
+ end subroutine rmd_get_MPI_interfaces
+
!
!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
!
+ subroutine read_mesh_databases_InnerOuter()
- subroutine read_mesh_databases_stacey(myrank, &
- nimin_crust_mantle,nimax_crust_mantle,njmin_crust_mantle, &
- njmax_crust_mantle,nkmin_xi_crust_mantle,nkmin_eta_crust_mantle, &
- nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
- nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
- reclen_xmin_crust_mantle,reclen_xmax_crust_mantle, &
- reclen_ymin_crust_mantle,reclen_ymax_crust_mantle, &
- nimin_outer_core,nimax_outer_core,njmin_outer_core, &
- njmax_outer_core,nkmin_xi_outer_core,nkmin_eta_outer_core, &
- nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
- reclen_xmin_outer_core,reclen_xmax_outer_core, &
- reclen_ymin_outer_core,reclen_ymax_outer_core, &
- reclen_zmin,NSPEC2D_BOTTOM, &
- SIMULATION_TYPE,SAVE_FORWARD,LOCAL_PATH,NSTEP)
+! sets up inner/outer elements for non-blocking MPI communication
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
implicit none
+
+ ! local parameters
+ real :: percentage_edge
+ integer :: ier,ispec,iinner,iouter
+ character(len=150) :: filename
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+ ! stores inner / outer elements
+ !
+ ! note: arrays is_on_a_slice_edge_.. have flags set for elements which need to
+ ! communicate with other MPI processes
+
+ ! crust_mantle
+ nspec_outer_crust_mantle = count( is_on_a_slice_edge_crust_mantle )
+ nspec_inner_crust_mantle = NSPEC_CRUST_MANTLE - nspec_outer_crust_mantle
+
+ num_phase_ispec_crust_mantle = max(nspec_inner_crust_mantle,nspec_outer_crust_mantle)
+
+ allocate(phase_ispec_inner_crust_mantle(num_phase_ispec_crust_mantle,2),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_crust_mantle')
+
+ phase_ispec_inner_crust_mantle(:,:) = 0
+ iinner = 0
+ iouter = 0
+ do ispec=1,NSPEC_CRUST_MANTLE
+ if( is_on_a_slice_edge_crust_mantle(ispec) ) then
+ ! outer element
+ iouter = iouter + 1
+ phase_ispec_inner_crust_mantle(iouter,1) = ispec
+ else
+ ! inner element
+ iinner = iinner + 1
+ phase_ispec_inner_crust_mantle(iinner,2) = ispec
+ endif
+ enddo
- integer myrank
+ ! outer_core
+ nspec_outer_outer_core = count( is_on_a_slice_edge_outer_core )
+ nspec_inner_outer_core = NSPEC_OUTER_CORE - nspec_outer_outer_core
- integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_CM) :: &
- nimin_crust_mantle,nimax_crust_mantle,nkmin_eta_crust_mantle
- integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_CM) :: &
- njmin_crust_mantle,njmax_crust_mantle,nkmin_xi_crust_mantle
- integer nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
- nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle
- integer reclen_xmin_crust_mantle, reclen_xmax_crust_mantle, reclen_ymin_crust_mantle, &
- reclen_ymax_crust_mantle
+ num_phase_ispec_outer_core = max(nspec_inner_outer_core,nspec_outer_outer_core)
+
+ allocate(phase_ispec_inner_outer_core(num_phase_ispec_outer_core,2),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_outer_core')
+
+ phase_ispec_inner_outer_core(:,:) = 0
+ iinner = 0
+ iouter = 0
+ do ispec=1,NSPEC_OUTER_CORE
+ if( is_on_a_slice_edge_outer_core(ispec) ) then
+ ! outer element
+ iouter = iouter + 1
+ phase_ispec_inner_outer_core(iouter,1) = ispec
+ else
+ ! inner element
+ iinner = iinner + 1
+ phase_ispec_inner_outer_core(iinner,2) = ispec
+ endif
+ enddo
- integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_OC) :: nimin_outer_core,nimax_outer_core,nkmin_eta_outer_core
- integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_OC) :: njmin_outer_core,njmax_outer_core,nkmin_xi_outer_core
- integer nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core
- integer reclen_xmin_outer_core, reclen_xmax_outer_core,reclen_ymin_outer_core, &
- reclen_ymax_outer_core
+ ! inner_core
+ nspec_outer_inner_core = count( is_on_a_slice_edge_inner_core )
+ nspec_inner_inner_core = NSPEC_INNER_CORE - nspec_outer_inner_core
- integer reclen_zmin
- integer, dimension(MAX_NUM_REGIONS) :: NSPEC2D_BOTTOM
+ num_phase_ispec_inner_core = max(nspec_inner_inner_core,nspec_outer_inner_core)
+
+ allocate(phase_ispec_inner_inner_core(num_phase_ispec_inner_core,2),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_inner_core')
+
+ phase_ispec_inner_inner_core(:,:) = 0
+ iinner = 0
+ iouter = 0
+ do ispec=1,NSPEC_INNER_CORE
+ if( is_on_a_slice_edge_inner_core(ispec) ) then
+ ! outer element
+ iouter = iouter + 1
+ phase_ispec_inner_inner_core(iouter,1) = ispec
+ else
+ ! inner element
+ iinner = iinner + 1
+ phase_ispec_inner_inner_core(iinner,2) = ispec
+ endif
+ enddo
+
+ ! user output
+ if(myrank == 0) then
- integer SIMULATION_TYPE
- logical SAVE_FORWARD
- character(len=150) LOCAL_PATH
- integer NSTEP
+ write(IMAIN,*) 'for overlapping of communications with calculations:'
+ write(IMAIN,*)
+ percentage_edge = 100.0 - 100. * nspec_outer_crust_mantle / real(NSPEC_CRUST_MANTLE)
+ write(IMAIN,*) 'percentage of edge elements in crust/mantle ',percentage_edge,'%'
+ write(IMAIN,*) 'percentage of volume elements in crust/mantle ',100. - percentage_edge,'%'
+ write(IMAIN,*)
+
+ percentage_edge = 100.0 - 100.* nspec_outer_outer_core / real(NSPEC_OUTER_CORE)
+ write(IMAIN,*) 'percentage of edge elements in outer core ',percentage_edge,'%'
+ write(IMAIN,*) 'percentage of volume elements in outer core ',100. - percentage_edge,'%'
+ write(IMAIN,*)
+
+ percentage_edge = 100.0 - 100. * nspec_outer_inner_core / real(NSPEC_INNER_CORE)
+ write(IMAIN,*) 'percentage of edge elements in inner core ',percentage_edge,'%'
+ write(IMAIN,*) 'percentage of volume elements in inner core ',100. - percentage_edge,'%'
+ write(IMAIN,*)
+
+ endif
+
+ ! debug: saves element flags
+ ! crust mantle
+ !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_crust_mantle_proc',myrank
+ !call write_VTK_data_elem_l(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
+ ! xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+ ! ibool_crust_mantle, &
+ ! is_on_a_slice_edge_crust_mantle,filename)
+ ! outer core
+ !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_outer_core_proc',myrank
+ !call write_VTK_data_elem_l(NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
+ ! xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+ ! ibool_outer_core, &
+ ! is_on_a_slice_edge_outer_core,filename)
+ ! inner core
+ write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_inner_core_proc',myrank
+ call write_VTK_data_elem_l(NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ ibool_inner_core, &
+ is_on_a_slice_edge_inner_core,filename)
+
+ end subroutine read_mesh_databases_InnerOuter
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_stacey()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+
+ implicit none
+
! local parameters
integer(kind=8) :: filesize
- character(len=150) prname
+ integer :: ier
+ ! sets up absorbing boundary buffer arrays
+ ! crust_mantle
+ if (nspec2D_xmin_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ nabs_xmin_cm = nspec2D_xmin_crust_mantle
+ else
+ nabs_xmin_cm = 1
+ endif
+ !daniel: not sure why dimensions are (..,..,..,..,8) ?
+ !allocate(absorb_xmin_crust_mantle5(NDIM,NGLLY,NGLLZ,nabs_xmin_cm,8),stat=ier)
+ allocate(absorb_xmin_crust_mantle(NDIM,NGLLY,NGLLZ,nabs_xmin_cm),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb xmin')
+
+ if (nspec2D_xmax_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ nabs_xmax_cm = nspec2D_xmax_crust_mantle
+ else
+ nabs_xmax_cm = 1
+ endif
+ !daniel: not sure why dimensions are (..,..,..,..,8)
+ !allocate(absorb_xmax_crust_mantle5(NDIM,NGLLY,NGLLZ,nabs_xmax_cm,8),stat=ier)
+ allocate(absorb_xmax_crust_mantle(NDIM,NGLLY,NGLLZ,nabs_xmax_cm),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb xmax')
+
+ if (nspec2D_ymin_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ nabs_ymin_cm = nspec2D_ymin_crust_mantle
+ else
+ nabs_ymin_cm = 1
+ endif
+ !daniel: not sure why dimensions are (..,..,..,..,8)
+ !allocate(absorb_ymin_crust_mantle5(NDIM,NGLLX,NGLLZ,nabs_ymin_cm,8),stat=ier)
+ allocate(absorb_ymin_crust_mantle(NDIM,NGLLX,NGLLZ,nabs_ymin_cm),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb ymin')
+
+ if (nspec2D_ymax_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ nabs_ymax_cm = nspec2D_ymax_crust_mantle
+ else
+ nabs_ymax_cm = 1
+ endif
+ !daniel: not sure why dimensions are (..,..,..,..,8)
+ !allocate(absorb_ymax_crust_mantle5(NDIM,NGLLX,NGLLZ,nabs_ymax_cm,8),stat=ier)
+ allocate(absorb_ymax_crust_mantle(NDIM,NGLLX,NGLLZ,nabs_ymax_cm),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb ymax')
+
+ ! outer_core
+ if (nspec2D_xmin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ nabs_xmin_oc = nspec2D_xmin_outer_core
+ else
+ nabs_xmin_oc = 1
+ endif
+ allocate(absorb_xmin_outer_core(NGLLY,NGLLZ,nabs_xmin_oc),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb xmin')
+
+ if (nspec2D_xmax_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ nabs_xmax_oc = nspec2D_xmax_outer_core
+ else
+ nabs_xmax_oc = 1
+ endif
+ allocate(absorb_xmax_outer_core(NGLLY,NGLLZ,nabs_xmax_oc),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb xmax')
+
+ if (nspec2D_ymin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ nabs_ymin_oc = nspec2D_ymin_outer_core
+ else
+ nabs_ymin_oc = 1
+ endif
+ allocate(absorb_ymin_outer_core(NGLLX,NGLLZ,nabs_ymin_oc),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb ymin')
+
+ if (nspec2D_ymax_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+ .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ nabs_ymax_oc = nspec2D_ymax_outer_core
+ else
+ nabs_ymax_oc = 1
+ endif
+ allocate(absorb_ymax_outer_core(NGLLX,NGLLZ,nabs_ymax_oc),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb ymax')
+
+ if (NSPEC2D_BOTTOM(IREGION_OUTER_CORE) > 0 .and. &
+ (SIMULATION_TYPE == 3 .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+ nabs_zmin_oc = NSPEC2D_BOTTOM(IREGION_OUTER_CORE)
+ else
+ nabs_zmin_oc = 1
+ endif
+ allocate(absorb_zmin_outer_core(NGLLX,NGLLY,nabs_zmin_oc),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb zmin')
+
+
! crust and mantle
+
! create name of database
call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
@@ -944,6 +2089,7 @@
! outer core
+
! create name of database
call create_name_database(prname,myrank,IREGION_OUTER_CORE,LOCAL_PATH)
@@ -1097,3 +2243,274 @@
endif
end subroutine read_mesh_databases_stacey
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!-------------------------------------------------------------------------------------------------
+!-------------------------------------------------------------------------------------------------
+!
+
+!daniel: TODO - place this auxiliary function...
+
+ subroutine heap_sort( N, array )
+
+! heap sort algorithm
+! sorts integer array (in increasing order, like 1 - 5 - 6 - 9 - 12 - 13 - 14 -...)
+
+ implicit none
+ integer,intent(in) :: N
+ integer,dimension(N),intent(inout) :: array
+
+ ! local parameters
+ integer :: tmp
+ integer :: i
+
+ ! checks if anything to do
+ if( N < 2 ) return
+
+ ! builds heap
+ do i = N/2, 1, -1
+ call heap_sort_siftdown(N,array,i,N)
+ enddo
+
+ ! sorts array
+ do i = N, 2, -1
+ ! swaps last and first entry in this section
+ tmp = array(1)
+ array(1) = array(i)
+ array(i) = tmp
+ call heap_sort_siftdown(N,array,1,i-1)
+ enddo
+
+ end subroutine heap_sort
+
+!
+!----
+!
+
+ subroutine heap_sort_siftdown(N,array,start,bottom)
+
+ implicit none
+
+ integer,intent(in):: N
+ integer,dimension(N),intent(inout) :: array
+ integer :: start,bottom
+
+ ! local parameters
+ integer :: i,j
+ integer :: tmp
+
+ i = start
+ tmp = array(i)
+ j = 2*i
+ do while( j <= bottom )
+ ! chooses larger value first in this section
+ if( j < bottom ) then
+ if( array(j) <= array(j+1) ) j = j + 1
+ endif
+
+ ! checks if section already smaller than inital value
+ if( array(j) < tmp ) exit
+
+ array(i) = array(j)
+ i = j
+ j = 2*i
+ enddo
+
+ array(i) = tmp
+ return
+
+ end subroutine heap_sort_siftdown
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+!daniel: TODO - place this auxiliary function...
+
+! external mesh routine for saving vtk files for points locations
+
+ subroutine write_VTK_data_points(nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ points_globalindices,num_points_globalindices, &
+ prname_file)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: nglob
+
+! global coordinates
+ real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
+
+! gll data values array
+ integer :: num_points_globalindices
+ integer, dimension(num_points_globalindices) :: points_globalindices
+
+! file name
+ character(len=150) prname_file
+
+ integer :: i,iglob
+
+! write source and receiver VTK files for Paraview
+ !debug
+ !write(IMAIN,*) ' vtk file: '
+ !write(IMAIN,*) ' ',prname_file(1:len_trim(prname_file))//'.vtk'
+
+ open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
+ write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
+ write(IOVTK,'(a)') 'material model VTK file'
+ write(IOVTK,'(a)') 'ASCII'
+ write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
+ write(IOVTK, '(a,i12,a)') 'POINTS ', num_points_globalindices, ' float'
+ do i=1,num_points_globalindices
+ iglob = points_globalindices(i)
+ if( iglob <= 0 .or. iglob > nglob ) then
+ print*,'error: '//prname_file(1:len_trim(prname_file))//'.vtk'
+ print*,'error global index: ',iglob,i
+ close(IOVTK)
+ stop 'error vtk points file'
+ endif
+
+ write(IOVTK,'(3e18.6)') xstore_dummy(iglob),ystore_dummy(iglob),zstore_dummy(iglob)
+ enddo
+ write(IOVTK,*) ""
+
+ close(IOVTK)
+
+ end subroutine write_VTK_data_points
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! external mesh routine for saving vtk files for points locations
+
+ subroutine write_VTK_glob_points(nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy, &
+ glob_values, &
+ prname_file)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: nglob
+
+ ! global coordinates
+ real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
+
+ ! gll data values array
+ real(kind=CUSTOM_REAL), dimension(nglob) :: glob_values
+
+ ! file name
+ character(len=150) prname_file
+
+ integer :: iglob
+
+ ! write source and receiver VTK files for Paraview
+ !debug
+ !write(IMAIN,*) ' vtk file: '
+ !write(IMAIN,*) ' ',prname_file(1:len_trim(prname_file))//'.vtk'
+
+ open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
+ write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
+ write(IOVTK,'(a)') 'material model VTK file'
+ write(IOVTK,'(a)') 'ASCII'
+ write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
+ write(IOVTK, '(a,i12,a)') 'POINTS ', nglob, ' float'
+ do iglob=1,nglob
+ write(IOVTK,*) xstore_dummy(iglob),ystore_dummy(iglob),zstore_dummy(iglob)
+ enddo
+ write(IOVTK,*) ""
+
+ ! writes out gll-data (velocity) for each element point
+ write(IOVTK,'(a,i12)') "POINT_DATA ",nglob
+ write(IOVTK,'(a)') "SCALARS glob_data float"
+ write(IOVTK,'(a)') "LOOKUP_TABLE default"
+ do iglob=1,nglob
+ write(IOVTK,*) glob_values(iglob)
+ enddo
+ write(IOVTK,*) ""
+
+ close(IOVTK)
+
+ end subroutine write_VTK_glob_points
+
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! routine for saving vtk file holding logical flag on each spectral element
+
+ subroutine write_VTK_data_elem_l(nspec,nglob, &
+ xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+ elem_flag,prname_file)
+
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: nspec,nglob
+
+! global coordinates
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+ real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
+
+! element flag array
+ logical, dimension(nspec) :: elem_flag
+ integer :: ispec,i
+
+! file name
+ character(len=150) prname_file
+
+! write source and receiver VTK files for Paraview
+ !debug
+ !write(IMAIN,*) ' vtk file: '
+ !write(IMAIN,*) ' ',prname_file(1:len_trim(prname_file))//'.vtk'
+
+ open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
+ write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
+ write(IOVTK,'(a)') 'material model VTK file'
+ write(IOVTK,'(a)') 'ASCII'
+ write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
+ write(IOVTK, '(a,i12,a)') 'POINTS ', nglob, ' float'
+ do i=1,nglob
+ write(IOVTK,'(3e18.6)') xstore_dummy(i),ystore_dummy(i),zstore_dummy(i)
+ enddo
+ write(IOVTK,*) ""
+
+ ! note: indices for vtk start at 0
+ write(IOVTK,'(a,i12,i12)') "CELLS ",nspec,nspec*9
+ do ispec=1,nspec
+ write(IOVTK,'(9i12)') 8,ibool(1,1,1,ispec)-1,ibool(NGLLX,1,1,ispec)-1,ibool(NGLLX,NGLLY,1,ispec)-1,ibool(1,NGLLY,1,ispec)-1,&
+ ibool(1,1,NGLLZ,ispec)-1,ibool(NGLLX,1,NGLLZ,ispec)-1,ibool(NGLLX,NGLLY,NGLLZ,ispec)-1,ibool(1,NGLLY,NGLLZ,ispec)-1
+ enddo
+ write(IOVTK,*) ""
+
+ ! type: hexahedrons
+ write(IOVTK,'(a,i12)') "CELL_TYPES ",nspec
+ write(IOVTK,*) (12,ispec=1,nspec)
+ write(IOVTK,*) ""
+
+ write(IOVTK,'(a,i12)') "CELL_DATA ",nspec
+ write(IOVTK,'(a)') "SCALARS elem_flag integer"
+ write(IOVTK,'(a)') "LOOKUP_TABLE default"
+ do ispec = 1,nspec
+ if( elem_flag(ispec) .eqv. .true. ) then
+ write(IOVTK,*) 1
+ else
+ write(IOVTK,*) 0
+ endif
+ enddo
+ write(IOVTK,*) ""
+ close(IOVTK)
+
+
+ end subroutine write_VTK_data_elem_l
+
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90 (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,66 @@
+!=====================================================================
+!
+! S p e c f e m 3 D G l o b e V e r s i o n 5 . 1
+! --------------------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA
+! and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine read_topography_bathymetry()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ implicit none
+
+ include 'mpif.h'
+
+ ! local parameters
+ integer :: ier
+
+ ! get MPI starting time
+ time_start = MPI_WTIME()
+
+ ! make ellipticity
+ if(ELLIPTICITY_VAL) then
+ call make_ellipticity(nspl,rspl,espl,espl2,ONE_CRUST)
+ endif
+
+ ! read topography and bathymetry file
+ if(myrank == 0 .and. (TOPOGRAPHY .or. OCEANS_VAL)) then
+ call read_topo_bathy_file(ibathy_topo)
+ endif
+
+ ! broadcast the information read on the master to the nodes
+ call MPI_BCAST(ibathy_topo,NX_BATHY*NY_BATHY,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+
+ ! user output
+ if( myrank == 0 .and. (TOPOGRAPHY .or. OCEANS_VAL)) then
+ ! elapsed time since beginning of mesh generation
+ tCPU = MPI_WTIME() - time_start
+ write(IMAIN,*)
+ write(IMAIN,*) 'Elapsed time for reading topo/bathy in seconds = ',sngl(tCPU)
+ write(IMAIN,*)
+ endif
+
+ end subroutine read_topography_bathymetry
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_forward_arrays.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_forward_arrays.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_forward_arrays.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -25,56 +25,23 @@
!
!=====================================================================
- subroutine save_forward_arrays(myrank,SIMULATION_TYPE,SAVE_FORWARD, &
- NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN, &
- displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
- displ_inner_core,veloc_inner_core,accel_inner_core, &
- displ_outer_core,veloc_outer_core,accel_outer_core, &
- R_memory_crust_mantle,R_memory_inner_core, &
- epsilondev_crust_mantle,epsilondev_inner_core, &
- A_array_rotation,B_array_rotation, &
- LOCAL_PATH)
+ subroutine save_forward_arrays()
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
-
- integer myrank
-
- integer SIMULATION_TYPE
- logical SAVE_FORWARD
- integer NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
- displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: &
- displ_inner_core,veloc_inner_core,accel_inner_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: &
- displ_outer_core,veloc_outer_core,accel_outer_core
-
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
- R_memory_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
- epsilondev_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
- R_memory_inner_core
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
- epsilondev_inner_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
- A_array_rotation,B_array_rotation
-
- character(len=150) LOCAL_PATH
-
! local parameters
character(len=150) outputname
-
! save files to local disk or tape system if restart file
if(NUMBER_OF_RUNS > 1 .and. NUMBER_OF_THIS_RUN < NUMBER_OF_RUNS) then
write(outputname,"('dump_all_arrays',i6.6)") myrank
open(unit=55,file=trim(LOCAL_PATH)//'/'//outputname,status='unknown',form='unformatted',action='write')
+
write(55) displ_crust_mantle
write(55) veloc_crust_mantle
write(55) accel_crust_mantle
@@ -84,12 +51,34 @@
write(55) displ_outer_core
write(55) veloc_outer_core
write(55) accel_outer_core
- write(55) epsilondev_crust_mantle
- write(55) epsilondev_inner_core
+
+ write(55) epsilondev_xx_crust_mantle
+ write(55) epsilondev_yy_crust_mantle
+ write(55) epsilondev_xy_crust_mantle
+ write(55) epsilondev_xz_crust_mantle
+ write(55) epsilondev_yz_crust_mantle
+
+ write(55) epsilondev_xx_inner_core
+ write(55) epsilondev_yy_inner_core
+ write(55) epsilondev_xy_inner_core
+ write(55) epsilondev_xz_inner_core
+ write(55) epsilondev_yz_inner_core
+
write(55) A_array_rotation
write(55) B_array_rotation
- write(55) R_memory_crust_mantle
- write(55) R_memory_inner_core
+
+ write(55) R_xx_crust_mantle
+ write(55) R_yy_crust_mantle
+ write(55) R_xy_crust_mantle
+ write(55) R_xz_crust_mantle
+ write(55) R_yz_crust_mantle
+
+ write(55) R_xx_inner_core
+ write(55) R_yy_inner_core
+ write(55) R_xy_inner_core
+ write(55) R_xz_inner_core
+ write(55) R_yz_inner_core
+
close(55)
endif
@@ -97,6 +86,7 @@
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
write(outputname,'(a,i6.6,a)') 'proc',myrank,'_save_forward_arrays.bin'
open(unit=55,file=trim(LOCAL_PATH)//'/'//outputname,status='unknown',form='unformatted',action='write')
+
write(55) displ_crust_mantle
write(55) veloc_crust_mantle
write(55) accel_crust_mantle
@@ -106,15 +96,36 @@
write(55) displ_outer_core
write(55) veloc_outer_core
write(55) accel_outer_core
- write(55) epsilondev_crust_mantle
- write(55) epsilondev_inner_core
+
+ write(55) epsilondev_xx_crust_mantle
+ write(55) epsilondev_yy_crust_mantle
+ write(55) epsilondev_xy_crust_mantle
+ write(55) epsilondev_xz_crust_mantle
+ write(55) epsilondev_yz_crust_mantle
+
+ write(55) epsilondev_xx_inner_core
+ write(55) epsilondev_yy_inner_core
+ write(55) epsilondev_xy_inner_core
+ write(55) epsilondev_xz_inner_core
+ write(55) epsilondev_yz_inner_core
+
if (ROTATION_VAL) then
write(55) A_array_rotation
write(55) B_array_rotation
endif
if (ATTENUATION_VAL) then
- write(55) R_memory_crust_mantle
- write(55) R_memory_inner_core
+ write(55) R_xx_crust_mantle
+ write(55) R_yy_crust_mantle
+ write(55) R_xy_crust_mantle
+ write(55) R_xz_crust_mantle
+ write(55) R_yz_crust_mantle
+
+ write(55) R_xx_inner_core
+ write(55) R_yy_inner_core
+ write(55) R_xy_inner_core
+ write(55) R_xz_inner_core
+ write(55) R_yz_inner_core
+
endif
close(55)
endif
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_GLL_points.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_GLL_points.f90 (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_GLL_points.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,57 @@
+!=====================================================================
+!
+! S p e c f e m 3 D G l o b e V e r s i o n 5 . 1
+! --------------------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA
+! and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine setup_GLL_points()
+
+ use specfem_par
+ implicit none
+
+ ! local parameters
+ integer :: i,j
+
+ ! set up GLL points, weights and derivation matrices
+ call define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube)
+
+ if( USE_DEVILLE_PRODUCTS_VAL ) then
+
+ ! check that optimized routines from Deville et al. (2002) can be used
+ if(NGLLX /= 5 .or. NGLLY /= 5 .or. NGLLZ /= 5) &
+ stop 'Deville et al. (2002) routines can only be used if NGLLX = NGLLY = NGLLZ = 5'
+
+ ! define transpose of derivation matrix
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ hprime_xxT(j,i) = hprime_xx(i,j)
+ hprimewgll_xxT(j,i) = hprimewgll_xx(i,j)
+ enddo
+ enddo
+ endif
+
+ end subroutine setup_GLL_points
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -25,94 +25,79 @@
!
!=====================================================================
- subroutine setup_sources_receivers(NSOURCES,myrank,ibool_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xigll,yigll,zigll,TOPOGRAPHY, &
- sec,tshift_cmt,theta_source,phi_source, &
- NSTEP,DT,hdur,hdur_gaussian,t0,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
- islice_selected_source,ispec_selected_source, &
- xi_source,eta_source,gamma_source,nu_source, &
- rspl,espl,espl2,nspl,ibathy_topo,NEX_XI,PRINT_SOURCE_TIME_FUNCTION, &
- rec_filename,nrec,islice_selected_rec,ispec_selected_rec, &
- xi_receiver,eta_receiver,gamma_receiver,station_name,network_name, &
- stlat,stlon,stele,stbur,nu, &
- nrec_local,nadj_rec_local,nrec_simulation, &
- SIMULATION_TYPE,RECEIVERS_CAN_BE_BURIED,MOVIE_SURFACE,MOVIE_VOLUME, &
- HDUR_MOVIE,OUTPUT_FILES,LOCAL_PATH)
+ subroutine setup_sources_receivers()
-
+ use specfem_par
implicit none
- include 'mpif.h'
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+ ! locates sources and determines simulation start time t0
+ call setup_sources()
- integer NSOURCES,myrank
+ ! reads in stations file and locates receivers
+ call setup_receivers()
+
+ ! write source and receiver VTK files for Paraview
+ call setup_sources_receivers_VTKfile()
+
+ ! pre-compute source arrays
+ call setup_sources_precompute_arrays()
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle
+ ! pre-compute receiver interpolation factors
+ call setup_receivers_precompute_intp()
- double precision, dimension(NGLLX) :: xigll
- double precision, dimension(NGLLY) :: yigll
- double precision, dimension(NGLLZ) :: zigll
+ ! user output
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'Total number of samples for seismograms = ',NSTEP
+ write(IMAIN,*)
+ if(NSOURCES > 1) write(IMAIN,*) 'Using ',NSOURCES,' point sources'
+ endif
- logical TOPOGRAPHY
+ end subroutine setup_sources_receivers
- double precision sec,DT,t0,min_tshift_cmt_original
+!
+!-------------------------------------------------------------------------------------------------
+!
- double precision, dimension(NSOURCES) :: tshift_cmt,hdur,hdur_gaussian
- double precision, dimension(NSOURCES) :: theta_source,phi_source
- double precision, dimension(NSOURCES) :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
- double precision, dimension(NSOURCES) :: xi_source,eta_source,gamma_source,nu_source
+ subroutine setup_sources()
- integer, dimension(NSOURCES) :: islice_selected_source,ispec_selected_source
- integer NSTEP
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_movie
+ implicit none
- ! for ellipticity
- integer nspl
- double precision rspl(NR),espl(NR),espl2(NR)
-
- integer, dimension(NX_BATHY,NY_BATHY) :: ibathy_topo
-
- integer NEX_XI
- logical PRINT_SOURCE_TIME_FUNCTION
-
- character(len=150) rec_filename
-
- integer nrec
- integer, dimension(nrec) :: islice_selected_rec,ispec_selected_rec
-
- double precision, dimension(nrec) :: xi_receiver,eta_receiver,gamma_receiver
- character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
- character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
-
- double precision, dimension(nrec) :: stlat,stlon,stele,stbur
- double precision, dimension(NDIM,NDIM,nrec) :: nu
-
- integer nrec_local,nadj_rec_local,nrec_simulation
-
- integer SIMULATION_TYPE
-
- logical RECEIVERS_CAN_BE_BURIED,MOVIE_SURFACE,MOVIE_VOLUME
-
- double precision HDUR_MOVIE
-
- character(len=150) OUTPUT_FILES
- character(len=150) LOCAL_PATH
-
! local parameters
- double precision :: junk
+ double precision :: min_tshift_cmt_original
+ double precision :: sec
integer :: yr,jda,ho,mi
- integer :: irec,isource,nrec_tot_found,ier
- integer :: icomp,itime,nadj_files_found,nadj_files_found_tot
- character(len=3),dimension(NDIM) :: comp
- character(len=256) :: filename,adj_source_file,system_command,filename_new
- character(len=2) :: bic
+ integer :: isource
+ character(len=256) :: filename
+ integer :: ier
+
! makes smaller hdur for movies
logical,parameter :: USE_SMALLER_HDUR_MOVIE = .false.
-! sources
+ ! allocate arrays for source
+ allocate(islice_selected_source(NSOURCES), &
+ ispec_selected_source(NSOURCES), &
+ Mxx(NSOURCES), &
+ Myy(NSOURCES), &
+ Mzz(NSOURCES), &
+ Mxy(NSOURCES), &
+ Mxz(NSOURCES), &
+ Myz(NSOURCES), &
+ xi_source(NSOURCES), &
+ eta_source(NSOURCES), &
+ gamma_source(NSOURCES), &
+ tshift_cmt(NSOURCES), &
+ hdur(NSOURCES), &
+ hdur_gaussian(NSOURCES), &
+ theta_source(NSOURCES), &
+ phi_source(NSOURCES), &
+ nu_source(NDIM,NDIM,NSOURCES),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating source arrays')
+
+ ! sources
! BS BS moved open statement and writing of first lines into sr.vtk before the
! call to locate_sources, where further write statements to that file follow
if(myrank == 0) then
@@ -140,6 +125,15 @@
if(abs(minval(tshift_cmt)) > TINYVAL) call exit_MPI(myrank,'one tshift_cmt must be zero, others must be positive')
+ ! count number of sources located in this slice
+ nsources_local = 0
+ if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
+ do isource = 1,NSOURCES
+ if(myrank == islice_selected_source(isource)) nsources_local = nsources_local + 1
+ enddo
+ endif
+
+
! filter source time function by Gaussian with hdur = HDUR_MOVIE when outputing movies or shakemaps
if (MOVIE_SURFACE .or. MOVIE_VOLUME ) then
! smaller hdur_movie will do
@@ -223,6 +217,57 @@
call exit_mpi(myrank,'error negative USER_T0 parameter in constants.h')
endif
+ ! get information about event name and location for SAC seismograms
+
+ ! The following line is added for get_event_info subroutine.
+ ! Because the way NSOURCES_SAC was declared has been changed.
+ ! The rest of the changes in this program is just the updates of the subroutines that
+ ! I did changes, e.g., adding/removing parameters. by Ebru Bozdag
+ call get_event_info_parallel(myrank,yr_SAC,jda_SAC,ho_SAC,mi_SAC,sec_SAC,&
+ event_name_SAC,t_cmt_SAC,t_shift_SAC, &
+ elat_SAC,elon_SAC,depth_SAC,mb_SAC,cmt_lat_SAC,&
+ cmt_lon_SAC,cmt_depth_SAC,cmt_hdur_SAC,NSOURCES)
+
+ end subroutine setup_sources
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine setup_receivers()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ implicit none
+
+ include 'mpif.h'
+
+ ! local parameters
+ double precision :: sec
+ double precision :: junk
+ integer :: yr,jda,ho,mi
+ integer :: irec,isource,nrec_tot_found
+ integer :: icomp,itime,nadj_files_found,nadj_files_found_tot
+ character(len=3),dimension(NDIM) :: comp
+ character(len=256) :: filename,adj_source_file
+ character(len=2) :: bic
+ integer :: ier
+
+ ! allocate memory for receiver arrays
+ allocate(islice_selected_rec(nrec), &
+ ispec_selected_rec(nrec), &
+ xi_receiver(nrec), &
+ eta_receiver(nrec), &
+ gamma_receiver(nrec), &
+ station_name(nrec), &
+ network_name(nrec), &
+ stlat(nrec), &
+ stlon(nrec), &
+ stele(nrec), &
+ stbur(nrec), &
+ nu(NDIM,NDIM,nrec),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating receiver arrays')
+
! receivers
if(myrank == 0) then
write(IMAIN,*)
@@ -334,7 +379,22 @@
write(IMAIN,*) 'this total is okay'
endif
endif
+
+ end subroutine setup_receivers
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine setup_sources_receivers_VTKfile()
+
+ use specfem_par
+ implicit none
+
+ ! local parameters
+ character(len=256) :: filename,system_command,filename_new
+
! user output
if(myrank == 0) then
@@ -365,22 +425,73 @@
"'",NSOURCES,"'",trim(filename),trim(filename_new),trim(filename)
call system(system_command)
+ endif
- write(IMAIN,*)
- write(IMAIN,*) 'Total number of samples for seismograms = ',NSTEP
- write(IMAIN,*)
+ end subroutine setup_sources_receivers_VTKfile
+!
+!-------------------------------------------------------------------------------------------------
+!
- if(NSOURCES > 1) write(IMAIN,*) 'Using ',NSOURCES,' point sources'
+ subroutine setup_sources_precompute_arrays()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ implicit none
+
+ ! local parameters
+ integer :: ier
+
+ ! allocates source arrays
+ if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
+ allocate(sourcearrays(NDIM,NGLLX,NGLLY,NGLLZ,NSOURCES),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating sourcearrays')
+
+ ! stores source arrays
+ call setup_sources_receivers_srcarr(NSOURCES,myrank, &
+ ispec_selected_source,islice_selected_source, &
+ xi_source,eta_source,gamma_source, &
+ Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ xigll,yigll,zigll,sourcearrays)
endif
- end subroutine setup_sources_receivers
+ ! adjoint source arrays
+ if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
+ NSTEP_SUB_ADJ = ceiling( dble(NSTEP)/dble(NTSTEP_BETWEEN_READ_ADJSRC) )
+ allocate(iadj_vec(NSTEP),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating iadj_vec')
+ ! initializes iadj_vec
+ do it=1,NSTEP
+ iadj_vec(it) = NSTEP-it+1 ! default is for reversing entire record
+ enddo
+
+ if(nadj_rec_local > 0) then
+ ! allocate adjoint source arrays
+ allocate(adj_sourcearrays(NDIM,NGLLX,NGLLY,NGLLZ,nadj_rec_local,NTSTEP_BETWEEN_READ_ADJSRC),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating adjoint sourcearrays')
+ adj_sourcearrays(:,:,:,:,:,:) = 0._CUSTOM_REAL
+
+ ! allocate indexing arrays
+ allocate(iadjsrc(NSTEP_SUB_ADJ,2), &
+ iadjsrc_len(NSTEP_SUB_ADJ),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating adjoint indexing arrays')
+ ! initializes iadjsrc, iadjsrc_len and iadj_vec
+ call setup_sources_receivers_adjindx(NSTEP,NSTEP_SUB_ADJ, &
+ NTSTEP_BETWEEN_READ_ADJSRC, &
+ iadjsrc,iadjsrc_len,iadj_vec)
+ endif
+ endif
+
+ end subroutine setup_sources_precompute_arrays
+
!
!-------------------------------------------------------------------------------------------------
!
-
subroutine setup_sources_receivers_srcarr(NSOURCES,myrank, &
ispec_selected_source,islice_selected_source, &
xi_source,eta_source,gamma_source, &
@@ -517,11 +628,89 @@
end subroutine setup_sources_receivers_adjindx
+
!
!-------------------------------------------------------------------------------------------------
!
+ subroutine setup_receivers_precompute_intp()
+ use specfem_par
+ implicit none
+
+ ! local parameters
+ integer :: ier
+
+ ! define local to global receiver numbering mapping
+ ! needs to be allocate for subroutine calls (even if nrec_local == 0)
+ allocate(number_receiver_global(nrec_local),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating global receiver numbering')
+
+ ! allocates receiver interpolators
+ if (nrec_local > 0) then
+ ! allocate Lagrange interpolators for receivers
+ allocate(hxir_store(nrec_local,NGLLX), &
+ hetar_store(nrec_local,NGLLY), &
+ hgammar_store(nrec_local,NGLLZ),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating receiver interpolators')
+
+ ! define and store Lagrange interpolators at all the receivers
+ if (SIMULATION_TYPE == 2) then
+ nadj_hprec_local = nrec_local
+ else
+ nadj_hprec_local = 1
+ endif
+ allocate(hpxir_store(nadj_hprec_local,NGLLX), &
+ hpetar_store(nadj_hprec_local,NGLLY), &
+ hpgammar_store(nadj_hprec_local,NGLLZ),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating derivative interpolators')
+
+ ! stores interpolators for receiver positions
+ call setup_sources_receivers_intp(NSOURCES,myrank, &
+ islice_selected_source, &
+ xi_source,eta_source,gamma_source, &
+ xigll,yigll,zigll, &
+ SIMULATION_TYPE,nrec,nrec_local, &
+ islice_selected_rec,number_receiver_global, &
+ xi_receiver,eta_receiver,gamma_receiver, &
+ hxir_store,hetar_store,hgammar_store, &
+ nadj_hprec_local,hpxir_store,hpetar_store,hpgammar_store)
+
+ ! allocate seismogram array
+ if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
+ allocate(seismograms(NDIM,nrec_local,NTSTEP_BETWEEN_OUTPUT_SEISMOS),stat=ier)
+ if(ier /= 0) stop 'error while allocating seismograms'
+ else
+ allocate(seismograms(NDIM*NDIM,nrec_local,NTSTEP_BETWEEN_OUTPUT_SEISMOS),stat=ier)
+ if(ier /= 0) stop 'error while allocating seismograms'
+ ! allocate Frechet derivatives array
+ allocate(moment_der(NDIM,NDIM,nrec_local),sloc_der(NDIM,nrec_local), &
+ stshift_der(nrec_local),shdur_der(nrec_local),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating frechet derivatives arrays')
+
+ moment_der(:,:,:) = 0._CUSTOM_REAL
+ sloc_der(:,:) = 0._CUSTOM_REAL
+ stshift_der(:) = 0._CUSTOM_REAL
+ shdur_der(:) = 0._CUSTOM_REAL
+
+ endif
+ ! initialize seismograms
+ seismograms(:,:,:) = 0._CUSTOM_REAL
+ nit_written = 0
+ else
+ ! allocate dummy array since we need it to pass as argument e.g. in write_seismograms() routine
+ ! note: nrec_local is zero, fortran 90/95 should allow zero-sized array allocation...
+ allocate(seismograms(NDIM,nrec_local,NTSTEP_BETWEEN_OUTPUT_SEISMOS),stat=ier)
+ if( ier /= 0) stop 'error while allocating zero seismograms'
+ endif
+
+ end subroutine setup_receivers_precompute_intp
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
subroutine setup_sources_receivers_intp(NSOURCES,myrank, &
islice_selected_source, &
xi_source,eta_source,gamma_source, &
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D.F90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D.F90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -34,28 +34,27 @@
!
!#define _HANDOPT
-! BEWARE:
-! BEWARE: we have observed that using _HANDOPT in combination with -O3 or higher can lead to problems on some machines;
-! BEWARE: thus, be careful when using it. At the very least, run the same test simulation once with _HANDOPT and once without
-! BEWARE: and make sure that all the seismograms you get are the same down to roundoff noise.
-! BEWARE:
-
! note: these hand optimizations should help compilers to pipeline the code and make better use of the cache;
! depending on compilers, it can further decrease the computation time by ~ 30%.
! the original routines are commented with "! way 1", the hand-optimized routines with "! way 2"
program xspecfem3D
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ use specfem_par_movie
implicit none
-! standard include of the MPI library
+ ! standard include of the MPI library
include 'mpif.h'
- include "constants.h"
- include "precision.h"
+ ! local parameters
+ integer :: ier
! include values created by the mesher
- include "OUTPUT_FILES/values_from_mesher.h"
+! include "OUTPUT_FILES/values_from_mesher.h"
!=======================================================================!
! !
@@ -374,531 +373,7 @@
! Its first time derivative is called veloc_outer_core.
! Its second time derivative is called accel_outer_core.
-! memory variables and standard linear solids for attenuation
- real(kind=CUSTOM_REAL), dimension(ATT1,ATT2,ATT3,ATT4) :: one_minus_sum_beta_crust_mantle, factor_scale_crust_mantle
- real(kind=CUSTOM_REAL), dimension(ATT1,ATT2,ATT3,ATT5) :: one_minus_sum_beta_inner_core, factor_scale_inner_core
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval, betaval, gammaval
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1,ATT2,ATT3,ATT4) :: factor_common_crust_mantle
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1,ATT2,ATT3,ATT5) :: factor_common_inner_core
-
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: epsilondev_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STRAIN_ONLY) :: eps_trace_over_3_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: R_memory_inner_core
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: epsilondev_inner_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STRAIN_ONLY) :: eps_trace_over_3_inner_core
-
-! ADJOINT
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: b_alphaval, b_betaval, b_gammaval
-
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT) :: b_R_memory_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: b_epsilondev_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: b_eps_trace_over_3_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT) :: b_R_memory_inner_core
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: b_epsilondev_inner_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: b_eps_trace_over_3_inner_core
-
-! for matching with central cube in inner core
- integer, dimension(:), allocatable :: sender_from_slices_to_cube
- integer, dimension(:,:), allocatable :: ibool_central_cube
- double precision, dimension(:,:), allocatable :: buffer_slices,b_buffer_slices,buffer_slices2
- double precision, dimension(:,:,:), allocatable :: buffer_all_cube_from_slices,b_buffer_all_cube_from_slices
- integer nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,receiver_cube_from_slices
-
- integer nspec2D_xmin_inner_core,nspec2D_xmax_inner_core,nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
-
-! to save movie frames
- integer nmovie_points,NIT
- real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
- store_val_x,store_val_y,store_val_z, &
- store_val_ux,store_val_uy,store_val_uz
- real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: &
- store_val_x_all,store_val_y_all,store_val_z_all, &
- store_val_ux_all,store_val_uy_all,store_val_uz_all
-
-! to save movie volume
- integer :: npoints_3dmovie,nspecel_3dmovie
- integer, dimension(NGLOB_CRUST_MANTLE) :: num_ibool_3dmovie
- double precision :: scalingval
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: muvstore_crust_mantle_3dmovie
- real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: nu_3dmovie
- logical, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STRAIN_ONLY) :: mask_3dmovie
-
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: Iepsilondev_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STRAIN_ONLY) :: Ieps_trace_over_3_crust_mantle
-
-! use integer array to store values
- integer, dimension(NX_BATHY,NY_BATHY) :: ibathy_topo
-
-! for crust/oceans coupling
- integer, dimension(NSPEC2DMAX_XMIN_XMAX_CM) :: ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle
- integer, dimension(NSPEC2DMAX_YMIN_YMAX_CM) :: ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle
- integer, dimension(NSPEC2D_BOTTOM_CM) :: ibelm_bottom_crust_mantle
- integer, dimension(NSPEC2D_TOP_CM) :: ibelm_top_crust_mantle
-
-! additional mass matrix for ocean load
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE_OCEANS) :: rmass_ocean_load
-
-! flag to mask ocean-bottom degrees of freedom for ocean load
- logical, dimension(NGLOB_CRUST_MANTLE_OCEANS) :: updated_dof_ocean_load
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_CM) :: jacobian2D_bottom_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_CM) :: jacobian2D_top_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_CM) :: jacobian2D_xmin_crust_mantle,&
- jacobian2D_xmax_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_CM) :: jacobian2D_ymin_crust_mantle,&
- jacobian2D_ymax_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_CM) :: &
- normal_xmin_crust_mantle,normal_xmax_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2DMAX_YMIN_YMAX_CM) :: &
- normal_ymin_crust_mantle,normal_ymax_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_CM) :: normal_bottom_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_CM) :: normal_top_crust_mantle
-
-! Stacey
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STACEY) :: rho_vp_crust_mantle,rho_vs_crust_mantle
- integer nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle,nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle
- integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_CM) :: nimin_crust_mantle,nimax_crust_mantle,nkmin_eta_crust_mantle
- integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_CM) :: njmin_crust_mantle,njmax_crust_mantle,nkmin_xi_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_STACEY) :: vp_outer_core
- integer nspec2D_xmin_outer_core,nspec2D_xmax_outer_core,nspec2D_ymin_outer_core,nspec2D_ymax_outer_core
- integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_OC) :: nimin_outer_core,nimax_outer_core,nkmin_eta_outer_core
- integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_OC) :: njmin_outer_core,njmax_outer_core,nkmin_xi_outer_core
-
-! arrays to couple with the fluid regions by pointwise matching
- integer, dimension(NSPEC2DMAX_XMIN_XMAX_OC) :: ibelm_xmin_outer_core,ibelm_xmax_outer_core
- integer, dimension(NSPEC2DMAX_YMIN_YMAX_OC) :: ibelm_ymin_outer_core,ibelm_ymax_outer_core
- integer, dimension(NSPEC2D_BOTTOM_OC) :: ibelm_bottom_outer_core
- integer, dimension(NSPEC2D_TOP_OC) :: ibelm_top_outer_core
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_OC) :: normal_xmin_outer_core,normal_xmax_outer_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_OC) :: normal_ymin_outer_core,normal_ymax_outer_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: normal_bottom_outer_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_OC) :: normal_top_outer_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: jacobian2D_bottom_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_OC) :: jacobian2D_top_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_OC) :: jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_OC) :: jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core
-
-
- integer, dimension(NSPEC2DMAX_XMIN_XMAX_IC) :: ibelm_xmin_inner_core,ibelm_xmax_inner_core
- integer, dimension(NSPEC2DMAX_YMIN_YMAX_IC) :: ibelm_ymin_inner_core,ibelm_ymax_inner_core
- integer, dimension(NSPEC2D_BOTTOM_IC) :: ibelm_bottom_inner_core
- integer, dimension(NSPEC2D_TOP_IC) :: ibelm_top_inner_core
-
-! for ellipticity
- integer nspl
- double precision rspl(NR),espl(NR),espl2(NR)
-
-! for conversion from x y z to r theta phi
- real(kind=CUSTOM_REAL) rval,thetaval,phival
-
-! ---- arrays to assemble between chunks
-
-! communication pattern for faces between chunks
- integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces,imsg_type
-
-! communication pattern for corners between chunks
- integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-
-! indirect addressing for each message for faces and corners of the chunks
-! a given slice can belong to at most one corner and at most two faces
- integer NGLOB2DMAX_XY
- integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle, &
- iboolfaces_outer_core,iboolfaces_inner_core
-
-! this for non blocking MPI
-
-! buffers for send and receive between faces of the slices and the chunks
-! we use the same buffers to assemble scalars and vectors because vectors are
-! always three times bigger and therefore scalars can use the first part
-! of the vector buffer in memory even if it has an additional index here
- integer :: npoin2D_max_all_CM_IC
- real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_faces,buffer_received_faces, &
- b_buffer_send_faces,b_buffer_received_faces
-
-! for non blocking communications
- logical, dimension(NSPEC_CRUST_MANTLE) :: is_on_a_slice_edge_crust_mantle
- logical, dimension(NSPEC_OUTER_CORE) :: is_on_a_slice_edge_outer_core
- logical, dimension(NSPEC_INNER_CORE) :: is_on_a_slice_edge_inner_core
- logical, dimension(NGLOB_CRUST_MANTLE) :: mask_ibool
- real :: percentage_edge
-
-! assembling phase number for non blocking MPI
-! iphase is for the crust_mantle, outer_core and inner_core regions
-! iphase_CC is for the central cube
- integer :: iphase,iphase_CC,icall
- integer :: b_iphase,b_iphase_CC,b_icall
-
-! -------- arrays specific to each region here -----------
-
-! ----------------- crust, mantle and oceans ---------------------
-
-! mesh parameters
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle,&
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle
-
-! arrays for isotropic elements stored only where needed to save space
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle
-
-! arrays for anisotropic elements stored only where needed to save space
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle
-
-! arrays for full anisotropy only when needed
- integer nspec_iso,nspec_tiso,nspec_ani
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle
-
-! local to global mapping
-! integer, dimension(NSPEC_CRUST_MANTLE) :: idoubling_crust_mantle
- logical, dimension(NSPEC_CRUST_MANTLE) :: ispec_is_tiso_crust_mantle
-
-! mass matrix
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: rmass_crust_mantle
-
-! displacement, velocity, acceleration
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
- displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle
-
-! ----------------- outer core ---------------------
-
-! mesh parameters
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: &
- xix_outer_core,xiy_outer_core,xiz_outer_core,&
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: &
- xstore_outer_core,ystore_outer_core,zstore_outer_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: &
- rhostore_outer_core,kappavstore_outer_core
-
-! local to global mapping
- integer, dimension(NSPEC_OUTER_CORE) :: idoubling_outer_core
- logical, dimension(NSPEC_OUTER_CORE) :: ispec_is_tiso_outer_core ! only needed for compute_boundary_kernel()
-
-! mass matrix
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: rmass_outer_core
-
-! velocity potential
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: displ_outer_core, &
- veloc_outer_core,accel_outer_core
-
-! ----------------- inner core ---------------------
-
-! mesh parameters
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
- xix_inner_core,xiy_inner_core,xiz_inner_core,&
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
- rhostore_inner_core, kappavstore_inner_core,muvstore_inner_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: &
- xstore_inner_core,ystore_inner_core,zstore_inner_core
-
-! arrays for inner-core anisotropy only when needed
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_IC) :: &
- c11store_inner_core,c33store_inner_core,c12store_inner_core, &
- c13store_inner_core,c44store_inner_core
-
-! local to global mapping
- integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
- logical, dimension(NSPEC_INNER_CORE) :: ispec_is_tiso_inner_core ! only needed for computer_boundary_kernel() routine
-
-! mass matrix
- real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: rmass_inner_core
-
-! displacement, velocity, acceleration
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: &
- displ_inner_core,veloc_inner_core,accel_inner_core
-
-! Newmark time scheme parameters and non-dimensionalization
- real(kind=CUSTOM_REAL) time,deltat,deltatover2,deltatsqover2
- double precision scale_t,scale_t_inv,scale_displ,scale_veloc
-
-! ADJOINT
- real(kind=CUSTOM_REAL) b_deltat,b_deltatover2,b_deltatsqover2
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
- b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE_ADJOINT) :: &
- b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE_ADJOINT) :: &
- b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: div_displ_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: b_div_displ_outer_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: rho_kl_crust_mantle, &
- beta_kl_crust_mantle, alpha_kl_crust_mantle, Sigma_kl_crust_mantle
-! For anisotropic kernels (see compute_kernels.f90 for a definition of the array)
- real(kind=CUSTOM_REAL), dimension(21,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: cijkl_kl_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: rho_kl_outer_core, &
- alpha_kl_outer_core
-
- ! approximate hessian
- real(kind=CUSTOM_REAL), dimension(:,:,:,:),allocatable :: hess_kl_crust_mantle
-
- ! check for deviatoric kernel for outer core region
- real(kind=CUSTOM_REAL), dimension(:,:,:,:),allocatable :: beta_kl_outer_core
- integer :: nspec_beta_kl_outer_core
- logical,parameter:: deviatoric_outercore = .false.
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: rho_kl_inner_core, &
- beta_kl_inner_core, alpha_kl_inner_core
-
- real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: absorb_xmin_crust_mantle5, &
- absorb_xmax_crust_mantle5, absorb_ymin_crust_mantle5, absorb_ymax_crust_mantle5
-
- real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: absorb_xmin_outer_core, &
- absorb_xmax_outer_core, absorb_ymin_outer_core, absorb_ymax_outer_core, &
- absorb_zmin_outer_core
- integer nabs_xmin_cm,nabs_xmax_cm,nabs_ymin_cm,nabs_ymax_cm
- integer nabs_xmin_oc,nabs_xmax_oc,nabs_ymin_oc,nabs_ymax_oc,nabs_zmin_oc
-
- integer reclen_xmin_crust_mantle, reclen_xmax_crust_mantle, reclen_ymin_crust_mantle, &
- reclen_ymax_crust_mantle, reclen_xmin_outer_core, reclen_xmax_outer_core,&
- reclen_ymin_outer_core, reclen_ymax_outer_core, reclen_zmin
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_OUTER_CORE) :: vector_accel_outer_core,&
- vector_displ_outer_core, b_vector_displ_outer_core
-
- integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
- integer npoin2D_faces_outer_core(NUMFACES_SHARED)
- integer npoin2D_faces_inner_core(NUMFACES_SHARED)
-
-! parameters for the source
- integer it
- integer, dimension(:), allocatable :: islice_selected_source,ispec_selected_source
- real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: sourcearrays
- double precision, dimension(:,:,:) ,allocatable:: nu_source
- double precision sec
- double precision, dimension(:), allocatable :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
- double precision, dimension(:), allocatable :: xi_source,eta_source,gamma_source
- double precision, dimension(:), allocatable :: tshift_cmt,hdur,hdur_gaussian
- double precision, dimension(:), allocatable :: theta_source,phi_source
- double precision, external :: comp_source_time_function
- double precision t0
-
-! receiver information
- integer nrec,nrec_local
- integer, dimension(:), allocatable :: islice_selected_rec,ispec_selected_rec,number_receiver_global
- double precision, dimension(:), allocatable :: xi_receiver,eta_receiver,gamma_receiver
- character(len=150) :: STATIONS,rec_filename
- double precision, dimension(:,:,:), allocatable :: nu
- double precision, allocatable, dimension(:) :: stlat,stlon,stele,stbur
- character(len=MAX_LENGTH_STATION_NAME), dimension(:), allocatable :: station_name
- character(len=MAX_LENGTH_NETWORK_NAME), dimension(:), allocatable :: network_name
-
-!ADJOINT
- real(kind=CUSTOM_REAL), dimension(:,:,:,:,:,:), allocatable :: adj_sourcearrays
- integer nrec_simulation, nadj_rec_local
- integer NSTEP_SUB_ADJ ! to read input in chunks
- integer, dimension(:,:), allocatable :: iadjsrc ! to read input in chunks
- integer, dimension(:), allocatable :: iadjsrc_len,iadj_vec
-! source frechet derivatives
- real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: moment_der
- real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: sloc_der
- real(kind=CUSTOM_REAL), dimension(:), allocatable :: stshift_der, shdur_der
- double precision, dimension(:,:), allocatable :: hpxir_store,hpetar_store,hpgammar_store
- integer :: nadj_hprec_local
-
-! seismograms
- integer it_begin,it_end,nit_written
- real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: seismograms
- integer :: seismo_offset, seismo_current
-
-! non-dimensionalized rotation rate of the Earth times two
- real(kind=CUSTOM_REAL) two_omega_earth
-
-! for the Euler scheme for rotation
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
- A_array_rotation,B_array_rotation
-
-! number of faces between chunks
- integer NUMMSGS_FACES
-
-! number of corners between chunks
- integer NCORNERSCHUNKS
-
-! number of message types
- integer NUM_MSG_TYPES
-
-! indirect addressing for each corner of the chunks
- integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
- integer, dimension(NGLOB1D_RADIAL_OC,NUMCORNERS_SHARED) :: iboolcorner_outer_core
- integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
-
-! buffers for send and receive between corners of the chunks
- real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL_CM) :: buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
- b_buffer_send_chunkcorn_scalar,b_buffer_recv_chunkcorn_scalar
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector
-
-! Gauss-Lobatto-Legendre points of integration and weights
- double precision, dimension(NGLLX) :: xigll,wxgll
- double precision, dimension(NGLLY) :: yigll,wygll
- double precision, dimension(NGLLZ) :: zigll,wzgll
-
-! product of weights for gravity term
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
-
-! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xxT,hprimewgll_xxT
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
- real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! Lagrange interpolators at receivers
- double precision, dimension(:,:), allocatable :: hxir_store,hetar_store,hgammar_store
-
-! 2-D addressing and buffers for summation between slices
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_OC) :: iboolleft_xi_outer_core,iboolright_xi_outer_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_OC) :: iboolleft_eta_outer_core,iboolright_eta_outer_core
-
- integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
- integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
-
-! for addressing of the slices
- integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
- integer, dimension(0:NPROCTOT_VAL-1) :: ichunk_slice,iproc_xi_slice,iproc_eta_slice
-
-! proc numbers for MPI
- integer myrank
-
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_outer_core,npoin2D_eta_outer_core
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_inner_core,npoin2D_eta_inner_core
-
- integer ichunk,iproc_xi,iproc_eta
-
-!ADJOINT
- real(kind=CUSTOM_REAL) b_two_omega_earth
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROT_ADJOINT) :: &
- b_A_array_rotation,b_B_array_rotation
-
- double precision :: time_start
-
-! parameters read from parameter file
- integer MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD,NER_CRUST, &
- NER_80_MOHO,NER_220_80,NER_400_220,NER_600_400,NER_670_600,NER_771_670, &
- NER_TOPDDOUBLEPRIME_771,NER_CMB_TOPDDOUBLEPRIME,NER_OUTER_CORE, &
- NER_TOP_CENTRAL_CUBE_ICB,NEX_XI,NEX_ETA, &
- NTSTEP_BETWEEN_OUTPUT_SEISMOS,&
- NTSTEP_BETWEEN_READ_ADJSRC,NSTEP,NSOURCES,NTSTEP_BETWEEN_FRAMES, &
- NTSTEP_BETWEEN_OUTPUT_INFO,NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN,SIMULATION_TYPE, &
- MOVIE_VOLUME_TYPE,MOVIE_START,MOVIE_STOP,NOISE_TOMOGRAPHY
-
- double precision DT,ROCEAN,RMIDDLE_CRUST, &
- RMOHO,R80,R220,R400,R600,R670,R771,RTOPDDOUBLEPRIME,RCMB,RICB, &
- RHO_TOP_OC,RHO_BOTTOM_OC,RHO_OCEANS,HDUR_MOVIE, &
- MOVIE_TOP,MOVIE_BOTTOM,MOVIE_WEST,MOVIE_EAST,MOVIE_NORTH,MOVIE_SOUTH, &
- ANGULAR_WIDTH_XI_IN_DEGREES
-
- logical ONE_CRUST,TOPOGRAPHY,MOVIE_SURFACE,MOVIE_VOLUME,MOVIE_COARSE, &
- RECEIVERS_CAN_BE_BURIED,PRINT_SOURCE_TIME_FUNCTION, &
- SAVE_MESH_FILES,ABSORBING_CONDITIONS,INCLUDE_CENTRAL_CUBE,SAVE_FORWARD, &
- OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM,OUTPUT_SEISMOS_SAC_BINARY, &
- ROTATE_SEISMOGRAMS_RT,HONOR_1D_SPHERICAL_MOHO,WRITE_SEISMOGRAMS_BY_MASTER,&
- SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE
-
- character(len=150) OUTPUT_FILES,LOCAL_PATH,MODEL
-
-! logical COMPUTE_AND_STORE_STRAIN
-
-! for SAC headers for seismograms
- integer yr_SAC,jda_SAC,ho_SAC,mi_SAC
- real mb_SAC
- double precision t_cmt_SAC,t_shift_SAC,elat_SAC,elon_SAC,depth_SAC, &
- cmt_lat_SAC,cmt_lon_SAC,cmt_depth_SAC,cmt_hdur_SAC,sec_SAC
- character(len=20) event_name_SAC
-
-! this for all the regions
- integer, dimension(MAX_NUM_REGIONS) :: NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX, &
- NSPEC2D_BOTTOM,NSPEC2D_TOP, &
- NGLOB1D_RADIAL, &
- NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX
-
- character(len=150) prname
-
-!daniel: debugging
-! character(len=256) :: filename
-! logical, parameter :: SNAPSHOT_INNER_CORE = .true.
-
-! lookup table every km for gravity
- real(kind=CUSTOM_REAL) minus_g_cmb,minus_g_icb
- double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table, &
- minus_deriv_gravity_table,density_table,d_ln_density_dr_table,minus_rho_g_over_kappa_fluid
-
-! dummy array that does not need to be actually read
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
-
-! computed in read_compute_parameters
- integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: ner,ratio_sampling_array
- integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: doubling_index
- double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: r_bottom,r_top
- logical, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: this_region_has_a_doubling
- double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: rmins,rmaxs
-
-! Boundary Mesh and Kernels
- integer k_top,k_bot,iregion_code
- integer, dimension(NSPEC2D_MOHO) :: ibelm_moho_top,ibelm_moho_bot
- integer, dimension(NSPEC2D_400) :: ibelm_400_top,ibelm_400_bot
- integer, dimension(NSPEC2D_670) :: ibelm_670_top,ibelm_670_bot
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_MOHO) :: normal_moho
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_400) :: normal_400
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_670) :: normal_670
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_MOHO) :: moho_kl, moho_kl_top, moho_kl_bot
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_400) :: d400_kl, d400_kl_top, d400_kl_bot
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_670) :: d670_kl, d670_kl_top, d670_kl_bot
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_CMB) :: cmb_kl, cmb_kl_top, cmb_kl_bot
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_ICB) :: icb_kl, icb_kl_top, icb_kl_bot
- logical :: fluid_solid_boundary
-
- integer :: i,ier
-
-! NOISE_TOMOGRAPHY
- real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: noise_sourcearray
- real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
- normal_x_noise,normal_y_noise,normal_z_noise, mask_noise
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: noise_surface_movie
- integer :: irec_master_noise
-
-#ifdef _HANDOPT
- integer :: imodulo_NGLOB_CRUST_MANTLE,imodulo_NGLOB_CRUST_MANTLE4, &
- imodulo_NGLOB_INNER_CORE
-#endif
-
! ************** PROGRAM STARTS HERE **************
!
!-------------------------------------------------------------------------------------------------
@@ -933,7 +408,7 @@
! passing them along as arguments to the routine makes the code slower.
! it seems that this stack/heap criterion is more complicated.
!
-! another reason why modules are avoided is to make the code thread safe.
+! another reason why the use of modules is restricted is to make the code thread safe.
! having different threads access the same data structure and modifying it at the same time
! would lead to problems. passing arguments is a way to avoid such complications.
!
@@ -970,3673 +445,29 @@
call force_ftz()
! initializes simulation parameters
- call initialize_simulation(myrank,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD,NER_CRUST, &
- NER_80_MOHO,NER_220_80,NER_400_220,NER_600_400,NER_670_600,NER_771_670, &
- NER_TOPDDOUBLEPRIME_771,NER_CMB_TOPDDOUBLEPRIME,NER_OUTER_CORE, &
- NER_TOP_CENTRAL_CUBE_ICB,ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI,NEX_ETA, &
- NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
- NTSTEP_BETWEEN_READ_ADJSRC,NSTEP,NSOURCES,NTSTEP_BETWEEN_FRAMES, &
- NTSTEP_BETWEEN_OUTPUT_INFO,NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN,SIMULATION_TYPE, &
- DT,ROCEAN,RMIDDLE_CRUST,RMOHO,R80,R220,R400,R600,R670,R771,&
- RTOPDDOUBLEPRIME,RCMB,RICB, &
- RHO_TOP_OC,RHO_BOTTOM_OC,RHO_OCEANS, &
- MOVIE_VOLUME_TYPE,MOVIE_START,MOVIE_STOP, &
- HDUR_MOVIE,MOVIE_TOP,MOVIE_BOTTOM,MOVIE_WEST,MOVIE_EAST, &
- MOVIE_NORTH,MOVIE_SOUTH,MOVIE_SURFACE,MOVIE_VOLUME, &
- RECEIVERS_CAN_BE_BURIED,PRINT_SOURCE_TIME_FUNCTION, &
- SAVE_MESH_FILES,ABSORBING_CONDITIONS,INCLUDE_CENTRAL_CUBE,SAVE_FORWARD, &
- SAVE_ALL_SEISMOS_IN_ONE_FILE,MOVIE_COARSE,OUTPUT_SEISMOS_ASCII_TEXT, &
- OUTPUT_SEISMOS_SAC_ALPHANUM,OUTPUT_SEISMOS_SAC_BINARY, &
- ROTATE_SEISMOGRAMS_RT,WRITE_SEISMOGRAMS_BY_MASTER,USE_BINARY_FOR_LARGE_FILE, &
- LOCAL_PATH,MODEL,OUTPUT_FILES, &
- ratio_sampling_array, ner, doubling_index,r_bottom,r_top, &
- this_region_has_a_doubling,rmins,rmaxs, &
- TOPOGRAPHY,HONOR_1D_SPHERICAL_MOHO,ONE_CRUST, &
- nspl,rspl,espl,espl2,ibathy_topo, &
- NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
- NGLOB1D_RADIAL,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX, &
- xigll,yigll,zigll,wxgll,wygll,wzgll,wgll_cube, &
- hprime_xx,hprime_yy,hprime_zz,hprime_xxT, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- rec_filename,STATIONS,nrec,NOISE_TOMOGRAPHY)
-!
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!
-! starts reading the databases
- call read_mesh_databases(myrank,rho_vp_crust_mantle,rho_vs_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- nspec_iso,nspec_tiso,nspec_ani, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! -- idoubling_crust_mantle,
- is_on_a_slice_edge_crust_mantle,rmass_crust_mantle,rmass_ocean_load, &
- vp_outer_core,xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- rhostore_outer_core,kappavstore_outer_core, &
- ibool_outer_core,idoubling_outer_core,ispec_is_tiso_outer_core, &
- is_on_a_slice_edge_outer_core,rmass_outer_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
- rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
- c11store_inner_core,c12store_inner_core,c13store_inner_core, &
- c33store_inner_core,c44store_inner_core, &
- ibool_inner_core,idoubling_inner_core,ispec_is_tiso_inner_core, &
- is_on_a_slice_edge_inner_core,rmass_inner_core, &
- ABSORBING_CONDITIONS,LOCAL_PATH)
+ call initialize_simulation()
- ! read 2-D addressing for summation between slices with MPI
- call read_mesh_databases_addressing(myrank, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle, &
- iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,npoin2D_faces_crust_mantle, &
- iboolcorner_crust_mantle, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core, &
- iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_xi_outer_core,npoin2D_eta_outer_core,&
- iboolfaces_outer_core,npoin2D_faces_outer_core, &
- iboolcorner_outer_core, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core, &
- iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,npoin2D_faces_inner_core, &
- iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces,imsg_type, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- LOCAL_PATH,OUTPUT_FILES, &
- NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB1D_RADIAL, &
- NGLOB2DMAX_XY,NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
- addressing,ichunk_slice,iproc_xi_slice,iproc_eta_slice, &
- ichunk,iproc_xi,iproc_eta)
+ ! starts reading the databases
+ call read_mesh_databases()
- ! to couple mantle with outer core
- call read_mesh_databases_coupling(myrank, &
- nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
- nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
- ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle,ibelm_ymin_crust_mantle, &
- ibelm_ymax_crust_mantle,ibelm_bottom_crust_mantle,ibelm_top_crust_mantle, &
- normal_xmin_crust_mantle,normal_xmax_crust_mantle,normal_ymin_crust_mantle, &
- normal_ymax_crust_mantle,normal_bottom_crust_mantle,normal_top_crust_mantle, &
- jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle,jacobian2D_ymin_crust_mantle, &
- jacobian2D_ymax_crust_mantle,jacobian2D_bottom_crust_mantle,jacobian2D_top_crust_mantle, &
- nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
- ibelm_xmin_outer_core,ibelm_xmax_outer_core,ibelm_ymin_outer_core, &
- ibelm_ymax_outer_core,ibelm_bottom_outer_core,ibelm_top_outer_core, &
- normal_xmin_outer_core,normal_xmax_outer_core,normal_ymin_outer_core, &
- normal_ymax_outer_core,normal_bottom_outer_core,normal_top_outer_core, &
- jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core,jacobian2D_ymin_outer_core, &
- jacobian2D_ymax_outer_core,jacobian2D_bottom_outer_core,jacobian2D_top_outer_core, &
- nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
- nspec2D_ymin_inner_core,nspec2D_ymax_inner_core, &
- ibelm_xmin_inner_core,ibelm_xmax_inner_core,ibelm_ymin_inner_core, &
- ibelm_ymax_inner_core,ibelm_bottom_inner_core,ibelm_top_inner_core, &
- ibelm_moho_top,ibelm_moho_bot,ibelm_400_top,ibelm_400_bot, &
- ibelm_670_top,ibelm_670_bot,normal_moho,normal_400,normal_670, &
- k_top,k_bot,moho_kl,d400_kl,d670_kl,cmb_kl,icb_kl, &
- LOCAL_PATH,SIMULATION_TYPE)
+ ! sets up reference element GLL points/weights/derivatives
+ call setup_GLL_points()
-! added this to reduce the size of the buffers
-! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
- npoin2D_max_all_CM_IC = max(maxval(npoin2D_xi_crust_mantle(:) + npoin2D_xi_inner_core(:)), &
- maxval(npoin2D_eta_crust_mantle(:) + npoin2D_eta_inner_core(:)))
+ ! reads topography & bathymetry & ellipticity
+ call read_topography_bathymetry()
- allocate(buffer_send_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED), &
- buffer_received_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating mpi buffer')
+ ! prepares sources and receivers
+ call setup_sources_receivers()
- allocate(b_buffer_send_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED), &
- b_buffer_received_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating mpi b_buffer')
+ ! sets up and precomputes simulation arrays
+ call prepare_timerun()
- call fix_non_blocking_slices(is_on_a_slice_edge_crust_mantle,iboolright_xi_crust_mantle, &
- iboolleft_xi_crust_mantle,iboolright_eta_crust_mantle,iboolleft_eta_crust_mantle, &
- npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle,ibool_crust_mantle, &
- mask_ibool,NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,NGLOB2DMAX_XMIN_XMAX_CM,NGLOB2DMAX_YMIN_YMAX_CM)
+ ! steps through time iterations
+ call iterate_time()
- call fix_non_blocking_slices(is_on_a_slice_edge_outer_core,iboolright_xi_outer_core, &
- iboolleft_xi_outer_core,iboolright_eta_outer_core,iboolleft_eta_outer_core, &
- npoin2D_xi_outer_core,npoin2D_eta_outer_core,ibool_outer_core, &
- mask_ibool,NSPEC_OUTER_CORE,NGLOB_OUTER_CORE,NGLOB2DMAX_XMIN_XMAX_OC,NGLOB2DMAX_YMIN_YMAX_OC)
+ ! saves last time frame and finishes kernel calculations
+ call finalize_simulation()
- call fix_non_blocking_slices(is_on_a_slice_edge_inner_core,iboolright_xi_inner_core, &
- iboolleft_xi_inner_core,iboolright_eta_inner_core,iboolleft_eta_inner_core, &
- npoin2D_xi_inner_core,npoin2D_eta_inner_core,ibool_inner_core, &
- mask_ibool,NSPEC_INNER_CORE,NGLOB_INNER_CORE,NGLOB2DMAX_XMIN_XMAX_IC,NGLOB2DMAX_YMIN_YMAX_IC)
-
- ! absorbing boundaries
- if(ABSORBING_CONDITIONS) then
- ! crust_mantle
- if (nspec2D_xmin_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- nabs_xmin_cm = nspec2D_xmin_crust_mantle
- else
- nabs_xmin_cm = 1
- endif
- allocate(absorb_xmin_crust_mantle5(NDIM,NGLLY,NGLLZ,nabs_xmin_cm,8),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb xmin')
-
- if (nspec2D_xmax_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- nabs_xmax_cm = nspec2D_xmax_crust_mantle
- else
- nabs_xmax_cm = 1
- endif
- allocate(absorb_xmax_crust_mantle5(NDIM,NGLLY,NGLLZ,nabs_xmax_cm,8),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb xmax')
-
- if (nspec2D_ymin_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- nabs_ymin_cm = nspec2D_ymin_crust_mantle
- else
- nabs_ymin_cm = 1
- endif
- allocate(absorb_ymin_crust_mantle5(NDIM,NGLLX,NGLLZ,nabs_ymin_cm,8),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb ymin')
-
- if (nspec2D_ymax_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- nabs_ymax_cm = nspec2D_ymax_crust_mantle
- else
- nabs_ymax_cm = 1
- endif
- allocate(absorb_ymax_crust_mantle5(NDIM,NGLLX,NGLLZ,nabs_ymax_cm,8),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb ymax')
-
- ! outer_core
- if (nspec2D_xmin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- nabs_xmin_oc = nspec2D_xmin_outer_core
- else
- nabs_xmin_oc = 1
- endif
- allocate(absorb_xmin_outer_core(NGLLY,NGLLZ,nabs_xmin_oc),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb xmin')
-
- if (nspec2D_xmax_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- nabs_xmax_oc = nspec2D_xmax_outer_core
- else
- nabs_xmax_oc = 1
- endif
- allocate(absorb_xmax_outer_core(NGLLY,NGLLZ,nabs_xmax_oc),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb xmax')
-
- if (nspec2D_ymin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- nabs_ymin_oc = nspec2D_ymin_outer_core
- else
- nabs_ymin_oc = 1
- endif
- allocate(absorb_ymin_outer_core(NGLLX,NGLLZ,nabs_ymin_oc),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb ymin')
-
- if (nspec2D_ymax_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- nabs_ymax_oc = nspec2D_ymax_outer_core
- else
- nabs_ymax_oc = 1
- endif
- allocate(absorb_ymax_outer_core(NGLLX,NGLLZ,nabs_ymax_oc),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb ymax')
-
- if (NSPEC2D_BOTTOM(IREGION_OUTER_CORE) > 0 .and. &
- (SIMULATION_TYPE == 3 .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- nabs_zmin_oc = NSPEC2D_BOTTOM(IREGION_OUTER_CORE)
- else
- nabs_zmin_oc = 1
- endif
- allocate(absorb_zmin_outer_core(NGLLX,NGLLY,nabs_zmin_oc),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating absorb zmin')
-
- ! read arrays for Stacey conditions
- call read_mesh_databases_stacey(myrank, &
- nimin_crust_mantle,nimax_crust_mantle,njmin_crust_mantle, &
- njmax_crust_mantle,nkmin_xi_crust_mantle,nkmin_eta_crust_mantle, &
- nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
- nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
- reclen_xmin_crust_mantle,reclen_xmax_crust_mantle, &
- reclen_ymin_crust_mantle,reclen_ymax_crust_mantle, &
- nimin_outer_core,nimax_outer_core,njmin_outer_core, &
- njmax_outer_core,nkmin_xi_outer_core,nkmin_eta_outer_core, &
- nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
- reclen_xmin_outer_core,reclen_xmax_outer_core, &
- reclen_ymin_outer_core,reclen_ymax_outer_core, &
- reclen_zmin,NSPEC2D_BOTTOM, &
- SIMULATION_TYPE,SAVE_FORWARD,LOCAL_PATH,NSTEP)
-
- endif
-
-!
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!
-! source and receivers
-
- ! allocate arrays for source
- allocate(islice_selected_source(NSOURCES), &
- ispec_selected_source(NSOURCES), &
- Mxx(NSOURCES), &
- Myy(NSOURCES), &
- Mzz(NSOURCES), &
- Mxy(NSOURCES), &
- Mxz(NSOURCES), &
- Myz(NSOURCES), &
- xi_source(NSOURCES), &
- eta_source(NSOURCES), &
- gamma_source(NSOURCES), &
- tshift_cmt(NSOURCES), &
- hdur(NSOURCES), &
- hdur_gaussian(NSOURCES), &
- theta_source(NSOURCES), &
- phi_source(NSOURCES), &
- nu_source(NDIM,NDIM,NSOURCES),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating source arrays')
-
- ! allocate memory for receiver arrays
- allocate(islice_selected_rec(nrec), &
- ispec_selected_rec(nrec), &
- xi_receiver(nrec), &
- eta_receiver(nrec), &
- gamma_receiver(nrec), &
- station_name(nrec), &
- network_name(nrec), &
- stlat(nrec), &
- stlon(nrec), &
- stele(nrec), &
- stbur(nrec), &
- nu(NDIM,NDIM,nrec),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating receiver arrays')
-
- ! locates sources and receivers
- call setup_sources_receivers(NSOURCES,myrank,ibool_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xigll,yigll,zigll,TOPOGRAPHY, &
- sec,tshift_cmt,theta_source,phi_source, &
- NSTEP,DT,hdur,hdur_gaussian,t0,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
- islice_selected_source,ispec_selected_source, &
- xi_source,eta_source,gamma_source,nu_source, &
- rspl,espl,espl2,nspl,ibathy_topo,NEX_XI,PRINT_SOURCE_TIME_FUNCTION, &
- rec_filename,nrec,islice_selected_rec,ispec_selected_rec, &
- xi_receiver,eta_receiver,gamma_receiver,station_name,network_name, &
- stlat,stlon,stele,stbur,nu, &
- nrec_local,nadj_rec_local,nrec_simulation, &
- SIMULATION_TYPE,RECEIVERS_CAN_BE_BURIED,MOVIE_SURFACE,MOVIE_VOLUME, &
- HDUR_MOVIE,OUTPUT_FILES,LOCAL_PATH)
-
- ! allocates source arrays
- if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
- allocate(sourcearrays(NDIM,NGLLX,NGLLY,NGLLZ,NSOURCES),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating sourcearrays')
-
- ! stores source arrays
- call setup_sources_receivers_srcarr(NSOURCES,myrank, &
- ispec_selected_source,islice_selected_source, &
- xi_source,eta_source,gamma_source, &
- Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
- xigll,yigll,zigll,sourcearrays)
- endif
-
-
- if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
- NSTEP_SUB_ADJ = ceiling( dble(NSTEP)/dble(NTSTEP_BETWEEN_READ_ADJSRC) )
- allocate(iadj_vec(NSTEP),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating iadj_vec')
-
- ! initializes iadj_vec
- do it=1,NSTEP
- iadj_vec(it) = NSTEP-it+1 ! default is for reversing entire record
- enddo
-
- if(nadj_rec_local > 0) then
- ! allocate adjoint source arrays
- allocate(adj_sourcearrays(NDIM,NGLLX,NGLLY,NGLLZ,nadj_rec_local,NTSTEP_BETWEEN_READ_ADJSRC),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating adjoint sourcearrays')
- adj_sourcearrays(:,:,:,:,:,:) = 0._CUSTOM_REAL
-
- ! allocate indexing arrays
- allocate(iadjsrc(NSTEP_SUB_ADJ,2), &
- iadjsrc_len(NSTEP_SUB_ADJ),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating adjoint indexing arrays')
- ! initializes iadjsrc, iadjsrc_len and iadj_vec
- call setup_sources_receivers_adjindx(NSTEP,NSTEP_SUB_ADJ, &
- NTSTEP_BETWEEN_READ_ADJSRC, &
- iadjsrc,iadjsrc_len,iadj_vec)
- endif
- endif
-
- ! allocates receiver interpolators
- if (nrec_local > 0) then
- ! allocate Lagrange interpolators for receivers
- allocate(hxir_store(nrec_local,NGLLX), &
- hetar_store(nrec_local,NGLLY), &
- hgammar_store(nrec_local,NGLLZ),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating receiver interpolators')
- ! define local to global receiver numbering mapping
- allocate(number_receiver_global(nrec_local),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating global receiver numbering')
- ! define and store Lagrange interpolators at all the receivers
- if (SIMULATION_TYPE == 2) then
- nadj_hprec_local = nrec_local
- else
- nadj_hprec_local = 1
- endif
- allocate(hpxir_store(nadj_hprec_local,NGLLX), &
- hpetar_store(nadj_hprec_local,NGLLY), &
- hpgammar_store(nadj_hprec_local,NGLLZ),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating derivative interpolators')
-
- ! stores interpolators for receiver positions
- call setup_sources_receivers_intp(NSOURCES,myrank, &
- islice_selected_source, &
- xi_source,eta_source,gamma_source, &
- xigll,yigll,zigll, &
- SIMULATION_TYPE,nrec,nrec_local, &
- islice_selected_rec,number_receiver_global, &
- xi_receiver,eta_receiver,gamma_receiver, &
- hxir_store,hetar_store,hgammar_store, &
- nadj_hprec_local,hpxir_store,hpetar_store,hpgammar_store)
-
- ! allocate seismogram array
- if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
- allocate(seismograms(NDIM,nrec_local,NTSTEP_BETWEEN_OUTPUT_SEISMOS),stat=ier)
- if(ier /= 0) stop 'error while allocating seismograms'
- else
- allocate(seismograms(NDIM*NDIM,nrec_local,NTSTEP_BETWEEN_OUTPUT_SEISMOS),stat=ier)
- if(ier /= 0) stop 'error while allocating seismograms'
- ! allocate Frechet derivatives array
- allocate(moment_der(NDIM,NDIM,nrec_local),sloc_der(NDIM,nrec_local), &
- stshift_der(nrec_local),shdur_der(nrec_local),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating frechet derivatives arrays')
-
- moment_der(:,:,:) = 0._CUSTOM_REAL
- sloc_der(:,:) = 0._CUSTOM_REAL
- stshift_der(:) = 0._CUSTOM_REAL
- shdur_der(:) = 0._CUSTOM_REAL
-
- endif
- ! initialize seismograms
- seismograms(:,:,:) = 0._CUSTOM_REAL
- nit_written = 0
- else
- ! allocate dummy array since we need it to pass as argument e.g. in write_seismograms() routine
- ! note: nrec_local is zero, fortran 90/95 should allow zero-sized array allocation...
- allocate(seismograms(NDIM,nrec_local,NTSTEP_BETWEEN_OUTPUT_SEISMOS),stat=ier)
- if( ier /= 0) stop 'error while allocating zero seismograms'
- allocate(number_receiver_global(nrec_local),stat=ier)
- if( ier /= 0) stop 'error while allocating zero number_receiver_global'
- endif
-
- ! get information about event name and location for SAC seismograms
-
- ! The following line is added for get_event_info subroutine.
- ! Because the way NSOURCES_SAC was declared has been changed.
- ! The rest of the changes in this program is just the updates of the subroutines that
- ! I did changes, e.g., adding/removing parameters. by Ebru Bozdag
- call get_event_info_parallel(myrank,yr_SAC,jda_SAC,ho_SAC,mi_SAC,sec_SAC,&
- event_name_SAC,t_cmt_SAC,t_shift_SAC, &
- elat_SAC,elon_SAC,depth_SAC,mb_SAC,cmt_lat_SAC,&
- cmt_lon_SAC,cmt_depth_SAC,cmt_hdur_SAC,NSOURCES)
-
-!
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!
-
- ! user output
- if(myrank == 0) then
-
- write(IMAIN,*)
- write(IMAIN,*) 'Reference radius of the Earth used is ',R_EARTH_KM,' km'
- write(IMAIN,*)
-
- write(IMAIN,*)
- if(OCEANS_VAL) then
- write(IMAIN,*) 'incorporating the oceans using equivalent load'
- else
- write(IMAIN,*) 'no oceans'
- endif
-
- write(IMAIN,*)
- if(ELLIPTICITY_VAL) then
- write(IMAIN,*) 'incorporating ellipticity'
- else
- write(IMAIN,*) 'no ellipticity'
- endif
-
- write(IMAIN,*)
- if(TOPOGRAPHY) then
- write(IMAIN,*) 'incorporating surface topography'
- else
- write(IMAIN,*) 'no surface topography'
- endif
-
- write(IMAIN,*)
- if(GRAVITY_VAL) then
- write(IMAIN,*) 'incorporating self-gravitation (Cowling approximation)'
- else
- write(IMAIN,*) 'no self-gravitation'
- endif
-
- write(IMAIN,*)
- if(ROTATION_VAL) then
- write(IMAIN,*) 'incorporating rotation'
- else
- write(IMAIN,*) 'no rotation'
- endif
-
- write(IMAIN,*)
- if(ATTENUATION_VAL) then
- write(IMAIN,*) 'incorporating attenuation using ',N_SLS,' standard linear solids'
-
- if(ATTENUATION_3D_VAL) write(IMAIN,*) 'using 3D attenuation'
-
- if(USE_ATTENUATION_MIMIC ) write(IMAIN,*) 'mimicking effects on velocity only'
- else
- write(IMAIN,*) 'no attenuation'
- endif
-
- write(IMAIN,*)
- write(IMAIN,*)
- write(IMAIN,*)
-
- endif
-
- ! the mass matrix needs to be assembled with MPI here once and for all
- call prepare_timerun_rmass(myrank,rmass_ocean_load,rmass_crust_mantle, &
- rmass_outer_core,rmass_inner_core, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle, &
- iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core, &
- iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core, &
- iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces,imsg_type, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
- NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
- NGLOB1D_RADIAL,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,npoin2D_max_all_CM_IC)
-
- ! mass matrix including central cube
- if(INCLUDE_CENTRAL_CUBE) then
-
- if(myrank == 0) write(IMAIN,*) 'including central cube'
-
- ! compute number of messages to expect in cube as well as their size
- call comp_central_cube_buffer_size(iproc_xi,iproc_eta,ichunk, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
- nb_msgs_theor_in_cube,npoin2D_cube_from_slices)
-
- ! this value is used for dynamic memory allocation, therefore make sure it is never zero
- if(nb_msgs_theor_in_cube > 0) then
- non_zero_nb_msgs_theor_in_cube = nb_msgs_theor_in_cube
- else
- non_zero_nb_msgs_theor_in_cube = 1
- endif
-
- ! allocate buffers for cube and slices
- allocate(sender_from_slices_to_cube(non_zero_nb_msgs_theor_in_cube), &
- buffer_all_cube_from_slices(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM), &
- b_buffer_all_cube_from_slices(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM), &
- buffer_slices(npoin2D_cube_from_slices,NDIM), &
- b_buffer_slices(npoin2D_cube_from_slices,NDIM), &
- buffer_slices2(npoin2D_cube_from_slices,NDIM), &
- ibool_central_cube(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating cube buffers')
-
- ! handles the communications with the central cube if it was included in the mesh
- call prepare_timerun_centralcube(myrank,rmass_inner_core, &
- iproc_xi,iproc_eta,ichunk, &
- NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX,NSPEC2D_BOTTOM, &
- addressing,ibool_inner_core,idoubling_inner_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
- nspec2D_ymin_inner_core,nspec2D_ymax_inner_core, &
- ibelm_xmin_inner_core,ibelm_xmax_inner_core, &
- ibelm_ymin_inner_core,ibelm_ymax_inner_core,ibelm_bottom_inner_core, &
- nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube, &
- npoin2D_cube_from_slices,receiver_cube_from_slices, &
- sender_from_slices_to_cube,ibool_central_cube, &
- buffer_slices,buffer_slices2,buffer_all_cube_from_slices)
-
- call fix_non_blocking_central_cube(is_on_a_slice_edge_inner_core, &
- ibool_inner_core,NSPEC_INNER_CORE,NGLOB_INNER_CORE,nb_msgs_theor_in_cube,ibelm_bottom_inner_core, &
- idoubling_inner_core,npoin2D_cube_from_slices,ibool_central_cube, &
- NSPEC2D_BOTTOM(IREGION_INNER_CORE),ichunk)
-
-
- else
-
- ! allocate fictitious buffers for cube and slices with a dummy size
- ! just to be able to use them as arguments in subroutine calls
- allocate(sender_from_slices_to_cube(1), &
- buffer_all_cube_from_slices(1,1,1), &
- b_buffer_all_cube_from_slices(1,1,1), &
- buffer_slices(1,1), &
- b_buffer_slices(1,1), &
- buffer_slices2(1,1), &
- ibool_central_cube(1,1),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating dummy buffers')
-
- endif
-
- ! check that all the mass matrices are positive
- if(OCEANS_VAL) then
- if(minval(rmass_ocean_load) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the oceans')
- endif
- if(minval(rmass_crust_mantle) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the crust_mantle')
- if(minval(rmass_inner_core) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the inner core')
- if(minval(rmass_outer_core) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the outer core')
-
- ! for efficiency, invert final mass matrix once and for all on each slice
- if(OCEANS_VAL) rmass_ocean_load = 1._CUSTOM_REAL / rmass_ocean_load
- rmass_crust_mantle = 1._CUSTOM_REAL / rmass_crust_mantle
- rmass_outer_core = 1._CUSTOM_REAL / rmass_outer_core
- rmass_inner_core = 1._CUSTOM_REAL / rmass_inner_core
-
-
- ! change x, y, z to r, theta and phi once and for all
- ! IMPROVE dangerous: old name kept (xstore ystore zstore) for new values
-
- ! convert in the crust and mantle
- do i = 1,NGLOB_CRUST_MANTLE
- call xyz_2_rthetaphi(xstore_crust_mantle(i), &
- ystore_crust_mantle(i), &
- zstore_crust_mantle(i),rval,thetaval,phival)
- xstore_crust_mantle(i) = rval
- ystore_crust_mantle(i) = thetaval
- zstore_crust_mantle(i) = phival
- enddo
-
- ! convert in the outer core
- do i = 1,NGLOB_OUTER_CORE
- call xyz_2_rthetaphi(xstore_outer_core(i), &
- ystore_outer_core(i), &
- zstore_outer_core(i),rval,thetaval,phival)
- xstore_outer_core(i) = rval
- ystore_outer_core(i) = thetaval
- zstore_outer_core(i) = phival
- enddo
-
- ! convert in the inner core
- do i = 1,NGLOB_INNER_CORE
- call xyz_2_rthetaphi(xstore_inner_core(i), &
- ystore_inner_core(i), &
- zstore_inner_core(i),rval,thetaval,phival)
- xstore_inner_core(i) = rval
- ystore_inner_core(i) = thetaval
- zstore_inner_core(i) = phival
- enddo
-
- ! allocate files to save movies
- if(MOVIE_SURFACE .or. NOISE_TOMOGRAPHY /=0) then ! for noise tomography, store_val_x/y/z/ux/uy/uz needed for 'surface movie'
- if(MOVIE_COARSE .and. NOISE_TOMOGRAPHY ==0) then ! only output corners !for noise tomography, must NOT be coarse
- nmovie_points = 2 * 2 * NSPEC2D_TOP(IREGION_CRUST_MANTLE)
- if(NGLLX /= NGLLY) &
- call exit_MPI(myrank,'MOVIE_COARSE together with MOVIE_SURFACE requires NGLLX=NGLLY')
- NIT = NGLLX - 1
- else
- nmovie_points = NGLLX * NGLLY * NSPEC2D_TOP(IREGION_CRUST_MANTLE)
- NIT = 1
- endif
- allocate(store_val_x(nmovie_points), &
- store_val_y(nmovie_points), &
- store_val_z(nmovie_points), &
- store_val_ux(nmovie_points), &
- store_val_uy(nmovie_points), &
- store_val_uz(nmovie_points),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating movie surface arrays')
-
- if (MOVIE_SURFACE) then ! those arrays are not neccessary for noise tomography, so only allocate them in MOVIE_SURFACE case
- allocate(store_val_x_all(nmovie_points,0:NPROCTOT_VAL-1), &
- store_val_y_all(nmovie_points,0:NPROCTOT_VAL-1), &
- store_val_z_all(nmovie_points,0:NPROCTOT_VAL-1), &
- store_val_ux_all(nmovie_points,0:NPROCTOT_VAL-1), &
- store_val_uy_all(nmovie_points,0:NPROCTOT_VAL-1), &
- store_val_uz_all(nmovie_points,0:NPROCTOT_VAL-1),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating movie surface all arrays')
- endif
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) 'Movie surface:'
- write(IMAIN,*) ' Writing to moviedata*** files in output directory'
- if(MOVIE_VOLUME_TYPE == 5) then
- write(IMAIN,*) ' movie output: displacement'
- else
- write(IMAIN,*) ' movie output: velocity'
- endif
- write(IMAIN,*) ' time steps every: ',NTSTEP_BETWEEN_FRAMES
- endif
- endif
-
-
- ! output point and element information for 3D movies
- if(MOVIE_VOLUME) then
- ! the following has to be true for the the array dimensions of eps to match with those of xstore etc..
- ! note that epsilondev and eps_trace_over_3 don't have the same dimensions.. could cause trouble
- if (NSPEC_CRUST_MANTLE_STR_OR_ATT /= NSPEC_CRUST_MANTLE) &
- stop 'NSPEC_CRUST_MANTLE_STRAINS_ATT /= NSPEC_CRUST_MANTLE'
- if (NSPEC_CRUST_MANTLE_STRAIN_ONLY /= NSPEC_CRUST_MANTLE) &
- stop 'NSPEC_CRUST_MANTLE_STRAIN_ONLY /= NSPEC_CRUST_MANTLE'
-
- write(prname,'(a,i6.6,a)') trim(LOCAL_PATH)//'/'//'proc',myrank,'_'
- call count_points_movie_volume(prname,ibool_crust_mantle, xstore_crust_mantle,ystore_crust_mantle, &
- zstore_crust_mantle,MOVIE_TOP,MOVIE_BOTTOM,MOVIE_WEST,MOVIE_EAST,MOVIE_NORTH,MOVIE_SOUTH, &
- MOVIE_COARSE,npoints_3dmovie,nspecel_3dmovie,num_ibool_3dmovie,mask_ibool,mask_3dmovie)
-
-
- allocate(nu_3dmovie(3,3,npoints_3dmovie),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating nu for 3d movie')
-
- call write_movie_volume_mesh(npoints_3dmovie,prname,ibool_crust_mantle,xstore_crust_mantle, &
- ystore_crust_mantle,zstore_crust_mantle, muvstore_crust_mantle_3dmovie, &
- mask_3dmovie,mask_ibool,num_ibool_3dmovie,nu_3dmovie,MOVIE_COARSE)
-
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) 'Movie volume:'
- write(IMAIN,*) ' Writing to movie3D*** files on local disk databases directory'
- if(MOVIE_VOLUME_TYPE == 1) then
- write(IMAIN,*) ' movie output: strain'
- else if(MOVIE_VOLUME_TYPE == 2) then
- write(IMAIN,*) ' movie output: time integral of strain'
- else if(MOVIE_VOLUME_TYPE == 3) then
- write(IMAIN,*) ' movie output: potency or integral of strain'
- else if(MOVIE_VOLUME_TYPE == 4) then
- write(IMAIN,*) ' movie output: divergence and curl'
- else if(MOVIE_VOLUME_TYPE == 5) then
- write(IMAIN,*) ' movie output: displacement'
- else if(MOVIE_VOLUME_TYPE == 6) then
- write(IMAIN,*) ' movie output: velocity'
- endif
- write(IMAIN,*) ' depth(T,B):',MOVIE_TOP,MOVIE_BOTTOM
- write(IMAIN,*) ' lon(W,E) :',MOVIE_WEST,MOVIE_EAST
- write(IMAIN,*) ' lat(S,N) :',MOVIE_SOUTH,MOVIE_NORTH
- write(IMAIN,*) ' Starting at time step:',MOVIE_START, 'ending at:',MOVIE_STOP,'every: ',NTSTEP_BETWEEN_FRAMES
- endif
-
- if( MOVIE_VOLUME_TYPE < 1 .or. MOVIE_VOLUME_TYPE > 6) &
- call exit_MPI(myrank, 'MOVIE_VOLUME_TYPE has to be 1,2,3,4,5 or 6')
-
- endif ! MOVIE_VOLUME
-
- ! sets up time increments and rotation constants
- call prepare_timerun_constants(myrank,NSTEP, &
- DT,t0,scale_t,scale_t_inv,scale_displ,scale_veloc, &
- deltat,deltatover2,deltatsqover2, &
- b_deltat,b_deltatover2,b_deltatsqover2, &
- two_omega_earth,A_array_rotation,B_array_rotation, &
- b_two_omega_earth, SIMULATION_TYPE)
-
- ! precomputes gravity factors
- call prepare_timerun_gravity(myrank, &
- minus_g_cmb,minus_g_icb, &
- minus_gravity_table,minus_deriv_gravity_table, &
- density_table,d_ln_density_dr_table,minus_rho_g_over_kappa_fluid, &
- ONE_CRUST,RICB,RCMB,RTOPDDOUBLEPRIME, &
- R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN)
-
- ! precomputes attenuation factors
- if(ATTENUATION_VAL) then
- call prepare_timerun_attenuation(myrank, &
- factor_scale_crust_mantle,one_minus_sum_beta_crust_mantle,factor_common_crust_mantle, &
- factor_scale_inner_core,one_minus_sum_beta_inner_core,factor_common_inner_core, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle, &
- c33store_crust_mantle,c44store_crust_mantle, &
- c55store_crust_mantle,c66store_crust_mantle, &
- muvstore_crust_mantle,muhstore_crust_mantle,ispec_is_tiso_crust_mantle, &
- muvstore_inner_core, &
- SIMULATION_TYPE,MOVIE_VOLUME,muvstore_crust_mantle_3dmovie, &
- c11store_inner_core,c12store_inner_core,c13store_inner_core, &
- c33store_inner_core,c44store_inner_core, &
- alphaval,betaval,gammaval,b_alphaval,b_betaval,b_gammaval, &
- deltat,b_deltat,LOCAL_PATH)
- endif
-
- if(myrank == 0) then
-
- write(IMAIN,*) 'for overlapping of communications with calculations:'
- write(IMAIN,*)
-
- percentage_edge = 100.*count(is_on_a_slice_edge_crust_mantle(:))/real(NSPEC_CRUST_MANTLE)
- write(IMAIN,*) 'percentage of edge elements in crust/mantle ',percentage_edge,'%'
- write(IMAIN,*) 'percentage of volume elements in crust/mantle ',100. - percentage_edge,'%'
- write(IMAIN,*)
-
- percentage_edge = 100.*count(is_on_a_slice_edge_outer_core(:))/real(NSPEC_OUTER_CORE)
- write(IMAIN,*) 'percentage of edge elements in outer core ',percentage_edge,'%'
- write(IMAIN,*) 'percentage of volume elements in outer core ',100. - percentage_edge,'%'
- write(IMAIN,*)
-
- percentage_edge = 100.*count(is_on_a_slice_edge_inner_core(:))/real(NSPEC_INNER_CORE)
- write(IMAIN,*) 'percentage of edge elements in inner core ',percentage_edge,'%'
- write(IMAIN,*) 'percentage of volume elements in inner core ',100. - percentage_edge,'%'
- write(IMAIN,*)
-
- endif
-
- if(.not. USE_NONBLOCKING_COMMS) then
- is_on_a_slice_edge_crust_mantle(:) = .true.
- is_on_a_slice_edge_outer_core(:) = .true.
- is_on_a_slice_edge_inner_core(:) = .true.
- endif
-
- ! initialize arrays to zero
- displ_crust_mantle(:,:) = 0._CUSTOM_REAL
- veloc_crust_mantle(:,:) = 0._CUSTOM_REAL
- accel_crust_mantle(:,:) = 0._CUSTOM_REAL
-
- displ_outer_core(:) = 0._CUSTOM_REAL
- veloc_outer_core(:) = 0._CUSTOM_REAL
- accel_outer_core(:) = 0._CUSTOM_REAL
-
- displ_inner_core(:,:) = 0._CUSTOM_REAL
- veloc_inner_core(:,:) = 0._CUSTOM_REAL
- accel_inner_core(:,:) = 0._CUSTOM_REAL
-
- ! put negligible initial value to avoid very slow underflow trapping
- if(FIX_UNDERFLOW_PROBLEM) then
- displ_crust_mantle(:,:) = VERYSMALLVAL
- displ_outer_core(:) = VERYSMALLVAL
- displ_inner_core(:,:) = VERYSMALLVAL
- endif
-
-! if doing benchmark runs to measure scaling of the code,
-! set the initial field to 1 to make sure gradual underflow trapping does not slow down the code
- if (DO_BENCHMARK_RUN_ONLY .and. SET_INITIAL_FIELD_TO_1_IN_BENCH) then
- displ_crust_mantle(:,:) = 1._CUSTOM_REAL
- veloc_crust_mantle(:,:) = 1._CUSTOM_REAL
- accel_crust_mantle(:,:) = 1._CUSTOM_REAL
-
- displ_outer_core(:) = 1._CUSTOM_REAL
- veloc_outer_core(:) = 1._CUSTOM_REAL
- accel_outer_core(:) = 1._CUSTOM_REAL
-
- displ_inner_core(:,:) = 1._CUSTOM_REAL
- veloc_inner_core(:,:) = 1._CUSTOM_REAL
- accel_inner_core(:,:) = 1._CUSTOM_REAL
- endif
-
- if (SIMULATION_TYPE == 3) then
- rho_kl_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
- beta_kl_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
- alpha_kl_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
- if (NOISE_TOMOGRAPHY == 3) Sigma_kl_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
-
- ! approximate hessian
- if( APPROXIMATE_HESS_KL ) then
- allocate( hess_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating hessian')
- hess_kl_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
- endif
-
- ! For anisotropic kernels (in crust_mantle only)
- cijkl_kl_crust_mantle(:,:,:,:,:) = 0._CUSTOM_REAL
-
- rho_kl_outer_core(:,:,:,:) = 0._CUSTOM_REAL
- alpha_kl_outer_core(:,:,:,:) = 0._CUSTOM_REAL
-
- rho_kl_inner_core(:,:,:,:) = 0._CUSTOM_REAL
- beta_kl_inner_core(:,:,:,:) = 0._CUSTOM_REAL
- alpha_kl_inner_core(:,:,:,:) = 0._CUSTOM_REAL
-
- div_displ_outer_core(:,:,:,:) = 0._CUSTOM_REAL
- b_div_displ_outer_core(:,:,:,:) = 0._CUSTOM_REAL
-
- ! deviatoric kernel check
- if( deviatoric_outercore) then
- nspec_beta_kl_outer_core = NSPEC_OUTER_CORE_ADJOINT
- else
- nspec_beta_kl_outer_core = 1
- endif
- allocate(beta_kl_outer_core(NGLLX,NGLLY,NGLLZ,nspec_beta_kl_outer_core),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating beta outercore')
- beta_kl_outer_core(:,:,:,:) = 0._CUSTOM_REAL
- endif
-
- ! initialize to be on the save side for adjoint runs SIMULATION_TYPE==2
- eps_trace_over_3_crust_mantle(:,:,:,:) = 0._CUSTOM_REAL
- epsilondev_crust_mantle(:,:,:,:,:) = 0._CUSTOM_REAL
- eps_trace_over_3_inner_core(:,:,:,:) = 0._CUSTOM_REAL
- epsilondev_inner_core(:,:,:,:,:) = 0._CUSTOM_REAL
- if(FIX_UNDERFLOW_PROBLEM) then
- eps_trace_over_3_crust_mantle(:,:,:,:) = VERYSMALLVAL
- epsilondev_crust_mantle(:,:,:,:,:) = VERYSMALLVAL
- eps_trace_over_3_inner_core(:,:,:,:) = VERYSMALLVAL
- epsilondev_inner_core(:,:,:,:,:) = VERYSMALLVAL
- endif
-
- if (COMPUTE_AND_STORE_STRAIN) then
- if(MOVIE_VOLUME .and. (MOVIE_VOLUME_TYPE == 2 .or. MOVIE_VOLUME_TYPE == 3)) then
- Iepsilondev_crust_mantle(:,:,:,:,:) = 0._CUSTOM_REAL
- Ieps_trace_over_3_crust_mantle(:,:,:,:)=0._CUSTOM_REAL
- endif
- endif
-
- ! clear memory variables if attenuation
- if(ATTENUATION_VAL) then
- R_memory_crust_mantle(:,:,:,:,:,:) = 0._CUSTOM_REAL
- R_memory_inner_core(:,:,:,:,:,:) = 0._CUSTOM_REAL
- if(FIX_UNDERFLOW_PROBLEM) then
- R_memory_crust_mantle(:,:,:,:,:,:) = VERYSMALLVAL
- R_memory_inner_core(:,:,:,:,:,:) = VERYSMALLVAL
- endif
- endif
-
- ! reads files back from local disk or MT tape system if restart file
- ! note: for SIMULATION_TYPE 3 simulations, the stored wavefields
- ! will be read in the time loop after the Newmark time scheme update.
- ! this makes indexing and timing easier to match with adjoint wavefields indexing.
- call read_forward_arrays_startrun(myrank,NSTEP, &
- SIMULATION_TYPE,NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN, &
- it_begin,it_end, &
- displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
- displ_inner_core,veloc_inner_core,accel_inner_core, &
- displ_outer_core,veloc_outer_core,accel_outer_core, &
- R_memory_crust_mantle,R_memory_inner_core, &
- epsilondev_crust_mantle,epsilondev_inner_core, &
- A_array_rotation,B_array_rotation, &
- b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
- b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
- b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core, &
- b_R_memory_crust_mantle,b_R_memory_inner_core, &
- b_epsilondev_crust_mantle,b_epsilondev_inner_core, &
- b_A_array_rotation,b_B_array_rotation,LOCAL_PATH)
-
-!<YANGL
- ! NOISE TOMOGRAPHY
- if ( NOISE_TOMOGRAPHY /= 0 ) then
- allocate(noise_sourcearray(NDIM,NGLLX,NGLLY,NGLLZ,NSTEP), &
- normal_x_noise(nmovie_points), &
- normal_y_noise(nmovie_points), &
- normal_z_noise(nmovie_points), &
- mask_noise(nmovie_points), &
- noise_surface_movie(NDIM,NGLLX,NGLLY,NSPEC2D_TOP(IREGION_CRUST_MANTLE)),stat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error allocating noise arrays')
-
- noise_sourcearray(:,:,:,:,:) = 0._CUSTOM_REAL
- normal_x_noise(:) = 0._CUSTOM_REAL
- normal_y_noise(:) = 0._CUSTOM_REAL
- normal_z_noise(:) = 0._CUSTOM_REAL
- mask_noise(:) = 0._CUSTOM_REAL
- noise_surface_movie(:,:,:,:) = 0._CUSTOM_REAL
-
- call read_parameters_noise(myrank,nrec,NSTEP,nmovie_points, &
- islice_selected_rec,xi_receiver,eta_receiver,gamma_receiver,nu, &
- noise_sourcearray,xigll,yigll,zigll,NSPEC2D_TOP(IREGION_CRUST_MANTLE), &
- NIT, ibool_crust_mantle, ibelm_top_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- irec_master_noise,normal_x_noise,normal_y_noise,normal_z_noise,mask_noise)
-
- call check_parameters_noise(myrank,NOISE_TOMOGRAPHY,SIMULATION_TYPE,SAVE_FORWARD, &
- NUMBER_OF_RUNS, NUMBER_OF_THIS_RUN,ROTATE_SEISMOGRAMS_RT, &
- SAVE_ALL_SEISMOS_IN_ONE_FILE, USE_BINARY_FOR_LARGE_FILE, &
- MOVIE_COARSE,LOCAL_PATH,NSPEC2D_TOP(IREGION_CRUST_MANTLE),NSTEP)
- endif
-!>YANGL
-
-!
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!
-
-!
-! s t a r t t i m e i t e r a t i o n s
-!
-
-! synchronize all processes to make sure everybody is ready to start time loop
- call MPI_BARRIER(MPI_COMM_WORLD,ier)
- if(myrank == 0) write(IMAIN,*) 'All processes are synchronized before time loop'
-
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) 'Starting time iteration loop...'
- write(IMAIN,*)
- endif
-
-! create an empty file to monitor the start of the simulation
- if(myrank == 0) then
- open(unit=IOUT,file=trim(OUTPUT_FILES)//'/starttimeloop.txt',status='unknown',action='write')
- write(IOUT,*) 'hello, starting time loop'
- close(IOUT)
- endif
-
-! initialize variables for writing seismograms
- seismo_offset = it_begin-1
- seismo_current = 0
-
-#ifdef _HANDOPT
- imodulo_NGLOB_CRUST_MANTLE = mod(NGLOB_CRUST_MANTLE,3)
- imodulo_NGLOB_CRUST_MANTLE4 = mod(NGLOB_CRUST_MANTLE,4)
- imodulo_NGLOB_INNER_CORE = mod(NGLOB_INNER_CORE,3)
-#endif
-
-! get MPI starting time
- time_start = MPI_WTIME()
-
-! *********************************************************
-! ************* MAIN LOOP OVER THE TIME STEPS *************
-! *********************************************************
-
- do it = it_begin,it_end
-
- ! update position in seismograms
- seismo_current = seismo_current + 1
-
- ! Newark time scheme update
-#ifdef _HANDOPT
-! way 2:
-! One common technique in computational science to help enhance pipelining is loop unrolling
-!
-! we're accessing NDIM=3 components at each line,
-! that is, for an iteration, the register must contain
-! NDIM * displ_ + NDIM * veloc_ + NDIM * accel + deltat + deltatsq..
-! in most cases a real (CUSTOM_REAL) value will have 4 bytes,
-! assuming a default cache size of about 128 bytes, we unroll here in steps of 3, thus 29 reals or 118 bytes,
-! rather than with steps of 4
- ! mantle
- if(imodulo_NGLOB_CRUST_MANTLE >= 1) then
- do i = 1,imodulo_NGLOB_CRUST_MANTLE
- displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
- + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
-
- veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
- + deltatover2*accel_crust_mantle(:,i)
-
- accel_crust_mantle(:,i) = 0._CUSTOM_REAL
- enddo
- endif
- do i = imodulo_NGLOB_CRUST_MANTLE+1,NGLOB_CRUST_MANTLE, 3 ! in steps of 3
- displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
- + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
- displ_crust_mantle(:,i+1) = displ_crust_mantle(:,i+1) &
- + deltat*veloc_crust_mantle(:,i+1) + deltatsqover2*accel_crust_mantle(:,i+1)
- displ_crust_mantle(:,i+2) = displ_crust_mantle(:,i+2) &
- + deltat*veloc_crust_mantle(:,i+2) + deltatsqover2*accel_crust_mantle(:,i+2)
-
-
- veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
- + deltatover2*accel_crust_mantle(:,i)
- veloc_crust_mantle(:,i+1) = veloc_crust_mantle(:,i+1) &
- + deltatover2*accel_crust_mantle(:,i+1)
- veloc_crust_mantle(:,i+2) = veloc_crust_mantle(:,i+2) &
- + deltatover2*accel_crust_mantle(:,i+2)
-
- ! set acceleration to zero
- ! note: we do initialize acceleration in this loop since it is read already into the cache,
- ! otherwise it would have to be read in again for this explicitly,
- ! which would make this step more expensive
- accel_crust_mantle(:,i) = 0._CUSTOM_REAL
- accel_crust_mantle(:,i+1) = 0._CUSTOM_REAL
- accel_crust_mantle(:,i+2) = 0._CUSTOM_REAL
- enddo
-
- ! outer core
- do i=1,NGLOB_OUTER_CORE
- displ_outer_core(i) = displ_outer_core(i) &
- + deltat*veloc_outer_core(i) + deltatsqover2*accel_outer_core(i)
-
- veloc_outer_core(i) = veloc_outer_core(i) &
- + deltatover2*accel_outer_core(i)
-
- accel_outer_core(i) = 0._CUSTOM_REAL
- enddo
-
- ! inner core
- if(imodulo_NGLOB_INNER_CORE >= 1) then
- do i = 1,imodulo_NGLOB_INNER_CORE
- displ_inner_core(:,i) = displ_inner_core(:,i) &
- + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
-
- veloc_inner_core(:,i) = veloc_inner_core(:,i) &
- + deltatover2*accel_inner_core(:,i)
-
- accel_inner_core(:,i) = 0._CUSTOM_REAL
- enddo
- endif
- do i = imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE, 3 ! in steps of 3
- displ_inner_core(:,i) = displ_inner_core(:,i) &
- + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
- displ_inner_core(:,i+1) = displ_inner_core(:,i+1) &
- + deltat*veloc_inner_core(:,i+1) + deltatsqover2*accel_inner_core(:,i+1)
- displ_inner_core(:,i+2) = displ_inner_core(:,i+2) &
- + deltat*veloc_inner_core(:,i+2) + deltatsqover2*accel_inner_core(:,i+2)
-
-
- veloc_inner_core(:,i) = veloc_inner_core(:,i) &
- + deltatover2*accel_inner_core(:,i)
- veloc_inner_core(:,i+1) = veloc_inner_core(:,i+1) &
- + deltatover2*accel_inner_core(:,i+1)
- veloc_inner_core(:,i+2) = veloc_inner_core(:,i+2) &
- + deltatover2*accel_inner_core(:,i+2)
-
- accel_inner_core(:,i) = 0._CUSTOM_REAL
- accel_inner_core(:,i+1) = 0._CUSTOM_REAL
- accel_inner_core(:,i+2) = 0._CUSTOM_REAL
- enddo
-
-#else
-! way 1:
- ! mantle
- do i=1,NGLOB_CRUST_MANTLE
- displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
- + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
- veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
- + deltatover2*accel_crust_mantle(:,i)
- accel_crust_mantle(:,i) = 0._CUSTOM_REAL
- enddo
- ! outer core
- do i=1,NGLOB_OUTER_CORE
- displ_outer_core(i) = displ_outer_core(i) &
- + deltat*veloc_outer_core(i) + deltatsqover2*accel_outer_core(i)
- veloc_outer_core(i) = veloc_outer_core(i) &
- + deltatover2*accel_outer_core(i)
- accel_outer_core(i) = 0._CUSTOM_REAL
- enddo
- ! inner core
- do i=1,NGLOB_INNER_CORE
- displ_inner_core(:,i) = displ_inner_core(:,i) &
- + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
- veloc_inner_core(:,i) = veloc_inner_core(:,i) &
- + deltatover2*accel_inner_core(:,i)
- accel_inner_core(:,i) = 0._CUSTOM_REAL
- enddo
-#endif
-
-
-
-
- ! backward field
- if (SIMULATION_TYPE == 3) then
-
-#ifdef _HANDOPT
-! way 2:
- ! mantle
- if(imodulo_NGLOB_CRUST_MANTLE >= 1) then
- do i=1,imodulo_NGLOB_CRUST_MANTLE
- b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
- + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
- b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
- + b_deltatover2*b_accel_crust_mantle(:,i)
- b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
- enddo
- endif
- do i=imodulo_NGLOB_CRUST_MANTLE+1,NGLOB_CRUST_MANTLE,3
- b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
- + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
- b_displ_crust_mantle(:,i+1) = b_displ_crust_mantle(:,i+1) &
- + b_deltat*b_veloc_crust_mantle(:,i+1) + b_deltatsqover2*b_accel_crust_mantle(:,i+1)
- b_displ_crust_mantle(:,i+2) = b_displ_crust_mantle(:,i+2) &
- + b_deltat*b_veloc_crust_mantle(:,i+2) + b_deltatsqover2*b_accel_crust_mantle(:,i+2)
-
-
- b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
- + b_deltatover2*b_accel_crust_mantle(:,i)
- b_veloc_crust_mantle(:,i+1) = b_veloc_crust_mantle(:,i+1) &
- + b_deltatover2*b_accel_crust_mantle(:,i+1)
- b_veloc_crust_mantle(:,i+2) = b_veloc_crust_mantle(:,i+2) &
- + b_deltatover2*b_accel_crust_mantle(:,i+2)
-
- b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
- b_accel_crust_mantle(:,i+1) = 0._CUSTOM_REAL
- b_accel_crust_mantle(:,i+2) = 0._CUSTOM_REAL
- enddo
-
- ! outer core
- do i=1,NGLOB_OUTER_CORE
- b_displ_outer_core(i) = b_displ_outer_core(i) &
- + b_deltat*b_veloc_outer_core(i) + b_deltatsqover2*b_accel_outer_core(i)
- b_veloc_outer_core(i) = b_veloc_outer_core(i) &
- + b_deltatover2*b_accel_outer_core(i)
- b_accel_outer_core(i) = 0._CUSTOM_REAL
- enddo
-
- ! inner core
- if(imodulo_NGLOB_INNER_CORE >= 1) then
- do i=1,imodulo_NGLOB_INNER_CORE
- b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
- + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
- b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
- + b_deltatover2*b_accel_inner_core(:,i)
- b_accel_inner_core(:,i) = 0._CUSTOM_REAL
- enddo
- endif
- do i=imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE,3
- b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
- + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
- b_displ_inner_core(:,i+1) = b_displ_inner_core(:,i+1) &
- + b_deltat*b_veloc_inner_core(:,i+1) + b_deltatsqover2*b_accel_inner_core(:,i+1)
- b_displ_inner_core(:,i+2) = b_displ_inner_core(:,i+2) &
- + b_deltat*b_veloc_inner_core(:,i+2) + b_deltatsqover2*b_accel_inner_core(:,i+2)
-
- b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
- + b_deltatover2*b_accel_inner_core(:,i)
- b_veloc_inner_core(:,i+1) = b_veloc_inner_core(:,i+1) &
- + b_deltatover2*b_accel_inner_core(:,i+1)
- b_veloc_inner_core(:,i+2) = b_veloc_inner_core(:,i+2) &
- + b_deltatover2*b_accel_inner_core(:,i+2)
-
- b_accel_inner_core(:,i) = 0._CUSTOM_REAL
- b_accel_inner_core(:,i+1) = 0._CUSTOM_REAL
- b_accel_inner_core(:,i+2) = 0._CUSTOM_REAL
- enddo
-#else
-! way 1:
- ! mantle
- do i=1,NGLOB_CRUST_MANTLE
- b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
- + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
- b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
- + b_deltatover2*b_accel_crust_mantle(:,i)
- b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
- enddo
- ! outer core
- do i=1,NGLOB_OUTER_CORE
- b_displ_outer_core(i) = b_displ_outer_core(i) &
- + b_deltat*b_veloc_outer_core(i) + b_deltatsqover2*b_accel_outer_core(i)
- b_veloc_outer_core(i) = b_veloc_outer_core(i) &
- + b_deltatover2*b_accel_outer_core(i)
- b_accel_outer_core(i) = 0._CUSTOM_REAL
- enddo
- ! inner core
- do i=1,NGLOB_INNER_CORE
- b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
- + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
- b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
- + b_deltatover2*b_accel_inner_core(:,i)
- b_accel_inner_core(:,i) = 0._CUSTOM_REAL
- enddo
-#endif
- endif ! SIMULATION_TYPE == 3
-
- ! integral of strain for adjoint movie volume
- if(MOVIE_VOLUME .and. (MOVIE_VOLUME_TYPE == 2 .or. MOVIE_VOLUME_TYPE == 3) ) then
- Iepsilondev_crust_mantle(:,:,:,:,:) = Iepsilondev_crust_mantle(:,:,:,:,:) &
- + deltat*epsilondev_crust_mantle(:,:,:,:,:)
- Ieps_trace_over_3_crust_mantle(:,:,:,:) = Ieps_trace_over_3_crust_mantle(:,:,:,:) &
- + deltat*eps_trace_over_3_crust_mantle(:,:,:,:)
- endif
-
- ! daniel: debugging
- !if( maxval(displ_crust_mantle(1,:)**2 + &
- ! displ_crust_mantle(2,:)**2 + displ_crust_mantle(3,:)**2) > 1.e4 ) then
- ! print*,'slice',myrank
- ! print*,' crust_mantle displ:', maxval(displ_crust_mantle(1,:)), &
- ! maxval(displ_crust_mantle(2,:)),maxval(displ_crust_mantle(3,:))
- ! print*,' indxs: ',maxloc( displ_crust_mantle(1,:)),maxloc( displ_crust_mantle(2,:)),maxloc( displ_crust_mantle(3,:))
- ! indx = maxloc( displ_crust_mantle(3,:) )
- ! rval = xstore_crust_mantle(indx(1))
- ! thetaval = ystore_crust_mantle(indx(1))
- ! phival = zstore_crust_mantle(indx(1))
- ! !thetaval = PI/2.0d0-datan(1.006760466d0*dcos(dble(thetaval))/dmax1(TINYVAL,dsin(dble(thetaval))))
- ! print*,'r/lat/lon:',rval*R_EARTH_KM,90.0-thetaval*180./PI,phival*180./PI
- ! call rthetaphi_2_xyz(rval,thetaval,phival,xstore_crust_mantle(indx(1)),&
- ! ystore_crust_mantle(indx(1)),zstore_crust_mantle(indx(1)))
- ! print*,'x/y/z:',rval,thetaval,phival
- ! call exit_MPI(myrank,'error stability')
- !endif
-
-
- ! compute the maximum of the norm of the displacement
- ! in all the slices using an MPI reduction
- ! and output timestamp file to check that simulation is running fine
- if(mod(it,NTSTEP_BETWEEN_OUTPUT_INFO) == 0 .or. it == 5 .or. it == NSTEP) &
- call check_simulation_stability(it,displ_crust_mantle,displ_inner_core,displ_outer_core, &
- b_displ_crust_mantle,b_displ_inner_core,b_displ_outer_core, &
- eps_trace_over_3_crust_mantle,epsilondev_crust_mantle, &
- SIMULATION_TYPE,OUTPUT_FILES,time_start,DT,t0,NSTEP, &
- myrank)
-
-
- ! ****************************************************
- ! big loop over all spectral elements in the fluid
- ! ****************************************************
-
- ! compute internal forces in the fluid region
- if(CUSTOM_REAL == SIZE_REAL) then
- time = sngl((dble(it-1)*DT-t0)*scale_t_inv)
- else
- time = (dble(it-1)*DT-t0)*scale_t_inv
- endif
-
- iphase = 0 ! do not start any non blocking communications at this stage
- icall = 1 ! compute all the outer elements first in the case of non blocking MPI
-
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- ! uses Deville et al. (2002) routine
- call compute_forces_outer_core_Dev(time,deltat,two_omega_earth, &
- A_array_rotation,B_array_rotation,d_ln_density_dr_table, &
- minus_rho_g_over_kappa_fluid, &
- displ_outer_core,accel_outer_core,div_displ_outer_core, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- is_on_a_slice_edge_outer_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar,iphase,icall, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- ibool_outer_core,MOVIE_VOLUME)
- else
- ! div_displ_outer_core is initialized to zero in the following subroutine.
- call compute_forces_outer_core(time,deltat,two_omega_earth, &
- A_array_rotation,B_array_rotation,d_ln_density_dr_table, &
- minus_rho_g_over_kappa_fluid, &
- displ_outer_core,accel_outer_core,div_displ_outer_core, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- is_on_a_slice_edge_outer_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar,iphase,icall, &
- hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- ibool_outer_core,MOVIE_VOLUME)
- endif
-
- if (SIMULATION_TYPE == 3) then
- ! note on backward/reconstructed wavefields:
- ! time for b_displ( it=1 ) corresponds to (NSTEP - 1)*DT - t0 (after Newmark scheme...)
- ! as we start with saved wavefields b_displ( 1 ) <-> displ( NSTEP ) which correspond
- ! to a time (NSTEP - (it-1) - 1)*DT - t0
- ! for reconstructing the rotational contributions
- if(CUSTOM_REAL == SIZE_REAL) then
- time = sngl((dble(NSTEP-it)*DT-t0)*scale_t_inv)
- else
- time = (dble(NSTEP-it)*DT-t0)*scale_t_inv
- endif
-
- b_iphase = 0 ! do not start any non blocking communications at this stage
- b_icall = 1 ! compute all the outer elements first in the case of non blocking MPI
-
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- ! uses Deville et al. (2002) routine
- call compute_forces_outer_core_Dev(time,b_deltat,b_two_omega_earth, &
- b_A_array_rotation,b_B_array_rotation,d_ln_density_dr_table, &
- minus_rho_g_over_kappa_fluid, &
- b_displ_outer_core,b_accel_outer_core,b_div_displ_outer_core, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- is_on_a_slice_edge_outer_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_scalar,b_buffer_recv_chunkcorn_scalar,b_iphase,b_icall, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- ibool_outer_core,MOVIE_VOLUME)
- else
- call compute_forces_outer_core(time,b_deltat,b_two_omega_earth, &
- b_A_array_rotation,b_B_array_rotation,d_ln_density_dr_table, &
- minus_rho_g_over_kappa_fluid, &
- b_displ_outer_core,b_accel_outer_core,b_div_displ_outer_core, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- is_on_a_slice_edge_outer_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_scalar,b_buffer_recv_chunkcorn_scalar,b_iphase,b_icall, &
- hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- ibool_outer_core,MOVIE_VOLUME)
- endif
- endif
-
- ! Stacey absorbing boundaries
- if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) then
- call compute_stacey_outer_core(ichunk,SIMULATION_TYPE,SAVE_FORWARD, &
- NSTEP,it,ibool_outer_core, &
- veloc_outer_core,accel_outer_core,b_accel_outer_core, &
- vp_outer_core,wgllwgll_xz,wgllwgll_yz,wgllwgll_xy, &
- jacobian2D_bottom_outer_core, &
- jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core, &
- jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core, &
- ibelm_bottom_outer_core, &
- ibelm_xmin_outer_core,ibelm_xmax_outer_core, &
- ibelm_ymin_outer_core,ibelm_ymax_outer_core, &
- nimin_outer_core,nimax_outer_core, &
- njmin_outer_core,njmax_outer_core, &
- nkmin_xi_outer_core,nkmin_eta_outer_core, &
- NSPEC2D_BOTTOM, &
- nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
- reclen_zmin, &
- reclen_xmin_outer_core,reclen_xmax_outer_core, &
- reclen_ymin_outer_core,reclen_ymax_outer_core, &
- nabs_zmin_oc, &
- nabs_xmin_oc,nabs_xmax_oc,nabs_ymin_oc,nabs_ymax_oc, &
- absorb_zmin_outer_core, &
- absorb_xmin_outer_core,absorb_xmax_outer_core, &
- absorb_ymin_outer_core,absorb_ymax_outer_core)
- endif ! Stacey conditions
-
-
- ! ****************************************************
- ! ********** add matching with solid part **********
- ! ****************************************************
-
- ! only for elements in first matching layer in the fluid
-
- !---
- !--- couple with mantle at the top of the outer core
- !---
- if(ACTUALLY_COUPLE_FLUID_CMB) &
- call compute_coupling_fluid_CMB(displ_crust_mantle,b_displ_crust_mantle, &
- ibool_crust_mantle,ibelm_bottom_crust_mantle, &
- accel_outer_core,b_accel_outer_core, &
- normal_top_outer_core,jacobian2D_top_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
- SIMULATION_TYPE,NSPEC2D_TOP(IREGION_OUTER_CORE))
-
- !---
- !--- couple with inner core at the bottom of the outer core
- !---
- if(ACTUALLY_COUPLE_FLUID_ICB) &
- call compute_coupling_fluid_ICB(displ_inner_core,b_displ_inner_core, &
- ibool_inner_core,ibelm_top_inner_core, &
- accel_outer_core,b_accel_outer_core, &
- normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
- SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_OUTER_CORE))
-
-
- ! assemble all the contributions between slices using MPI
-
- ! outer core
- if(USE_NONBLOCKING_COMMS) then
- iphase = 1 ! start the non blocking communications
- call assemble_MPI_scalar(myrank,accel_outer_core,NGLOB_OUTER_CORE, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
- NUMMSGS_FACES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XY,NCHUNKS_VAL,iphase)
-
- icall = 2 ! now compute all the inner elements in the case of non blocking MPI
-
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- ! uses Deville et al. (2002) routine
- call compute_forces_outer_core_Dev(time,deltat,two_omega_earth, &
- A_array_rotation,B_array_rotation,d_ln_density_dr_table, &
- minus_rho_g_over_kappa_fluid,displ_outer_core,accel_outer_core,div_displ_outer_core, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- is_on_a_slice_edge_outer_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar,iphase,icall, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- ibool_outer_core,MOVIE_VOLUME)
- else
- ! div_displ_outer_core is initialized to zero in the following subroutine.
- call compute_forces_outer_core(time,deltat,two_omega_earth, &
- A_array_rotation,B_array_rotation,d_ln_density_dr_table, &
- minus_rho_g_over_kappa_fluid,displ_outer_core,accel_outer_core,div_displ_outer_core, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- is_on_a_slice_edge_outer_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar,iphase,icall, &
- hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- ibool_outer_core,MOVIE_VOLUME)
- endif
-
- do while (iphase <= 7) ! make sure the last communications are finished and processed
- call assemble_MPI_scalar(myrank,accel_outer_core,NGLOB_OUTER_CORE, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
- NUMMSGS_FACES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XY,NCHUNKS_VAL,iphase)
- enddo
-
- else ! if(.not. USE_NONBLOCKING_COMMS) then
-
- call assemble_MPI_scalar_block(myrank,accel_outer_core,NGLOB_OUTER_CORE, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core, &
- iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces,imsg_type, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
- NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XY,NCHUNKS_VAL)
-
- endif
-
- ! multiply by the inverse of the mass matrix and update velocity
- do i=1,NGLOB_OUTER_CORE
- accel_outer_core(i) = accel_outer_core(i)*rmass_outer_core(i)
- veloc_outer_core(i) = veloc_outer_core(i) + deltatover2*accel_outer_core(i)
- enddo
-
- if (SIMULATION_TYPE == 3) then
-
-! ------------------- new non blocking implementation -------------------
-
- ! outer core
- if(USE_NONBLOCKING_COMMS) then
- b_iphase = 1 ! start the non blocking communications
- call assemble_MPI_scalar(myrank,b_accel_outer_core,NGLOB_OUTER_CORE, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_scalar,b_buffer_recv_chunkcorn_scalar, &
- NUMMSGS_FACES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XY,NCHUNKS_VAL,b_iphase)
-
- b_icall = 2 ! now compute all the inner elements in the case of non blocking MPI
-
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- ! uses Deville et al. (2002) routine
- call compute_forces_outer_core_Dev(time,b_deltat,b_two_omega_earth, &
- b_A_array_rotation,b_B_array_rotation,d_ln_density_dr_table, &
- minus_rho_g_over_kappa_fluid, &
- b_displ_outer_core,b_accel_outer_core,b_div_displ_outer_core, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- is_on_a_slice_edge_outer_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_scalar,b_buffer_recv_chunkcorn_scalar,b_iphase,b_icall, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- ibool_outer_core,MOVIE_VOLUME)
- else
- ! div_displ_outer_core is initialized to zero in the following subroutine.
- call compute_forces_outer_core(time,b_deltat,b_two_omega_earth, &
- b_A_array_rotation,b_B_array_rotation,d_ln_density_dr_table, &
- minus_rho_g_over_kappa_fluid, &
- b_displ_outer_core,b_accel_outer_core,b_div_displ_outer_core, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- is_on_a_slice_edge_outer_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_scalar,b_buffer_recv_chunkcorn_scalar,b_iphase,b_icall, &
- hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- ibool_outer_core,MOVIE_VOLUME)
- endif
-
- do while (b_iphase <= 7) ! make sure the last communications are finished and processed
- call assemble_MPI_scalar(myrank,b_accel_outer_core,NGLOB_OUTER_CORE, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_scalar,b_buffer_recv_chunkcorn_scalar, &
- NUMMSGS_FACES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XY,NCHUNKS_VAL,b_iphase)
- enddo
-
- else ! if(.not. USE_NONBLOCKING_COMMS) then
-
- call assemble_MPI_scalar_block(myrank,b_accel_outer_core,NGLOB_OUTER_CORE, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core, &
- iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces,imsg_type, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_scalar,b_buffer_recv_chunkcorn_scalar, &
- NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XY,NCHUNKS_VAL)
-
- endif
-
-! ------------------- new non blocking implementation -------------------
-
- ! Newmark time scheme - corrector for fluid parts
- do i=1,NGLOB_OUTER_CORE
- b_accel_outer_core(i) = b_accel_outer_core(i)*rmass_outer_core(i)
- b_veloc_outer_core(i) = b_veloc_outer_core(i) + b_deltatover2*b_accel_outer_core(i)
- enddo
-
- endif
-
- ! ****************************************************
- ! big loop over all spectral elements in the solid
- ! ****************************************************
-
- ! compute internal forces in the solid regions
-
- ! for anisotropy and gravity, x y and z contain r theta and phi
-
- iphase = 0 ! do not start any non blocking communications at this stage
- iphase_CC = 0 ! do not start any non blocking communications at this stage
- icall = 1 ! compute all the outer elements first in the case of non blocking MPI
-
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- call compute_forces_crust_mantle_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_crust_mantle,accel_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
-!----------------------
- is_on_a_slice_edge_crust_mantle,icall, &
- accel_inner_core,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_xxT, &
- hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_crust_mantle,kappahstore_crust_mantle,muvstore_crust_mantle, &
- muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- R_memory_crust_mantle,epsilondev_crust_mantle, &
- eps_trace_over_3_crust_mantle,one_minus_sum_beta_crust_mantle, &
- alphaval,betaval,gammaval,factor_common_crust_mantle, &
- size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
- size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
- else
- call compute_forces_crust_mantle(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_crust_mantle,accel_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
-!----------------------
- is_on_a_slice_edge_crust_mantle,icall, &
- accel_inner_core,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_crust_mantle,kappahstore_crust_mantle,muvstore_crust_mantle, &
- muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- R_memory_crust_mantle,epsilondev_crust_mantle, &
- eps_trace_over_3_crust_mantle,one_minus_sum_beta_crust_mantle, &
- alphaval,betaval,gammaval,factor_common_crust_mantle, &
- size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
- size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
- endif
-
- if (SIMULATION_TYPE == 3 ) then
-
- b_iphase = 0 ! do not start any non blocking communications at this stage
- b_iphase_CC = 0 ! do not start any non blocking communications at this stage
- b_icall = 1 ! compute all the outer elements first in the case of non blocking MPI
-
- ! for anisotropy and gravity, x y and z contain r theta and phi
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- call compute_forces_crust_mantle_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- b_displ_crust_mantle,b_accel_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
-!----------------------
- is_on_a_slice_edge_crust_mantle,b_icall, &
- b_accel_inner_core,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector,b_iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,b_buffer_all_cube_from_slices,b_buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,b_iphase_CC, &
-!----------------------
- hprime_xx,hprime_xxT, &
- hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_crust_mantle,kappahstore_crust_mantle,muvstore_crust_mantle, &
- muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! -- idoubling_crust_mantle, &
- b_R_memory_crust_mantle,b_epsilondev_crust_mantle, &
- b_eps_trace_over_3_crust_mantle,one_minus_sum_beta_crust_mantle, &
- b_alphaval,b_betaval,b_gammaval,factor_common_crust_mantle, &
- size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
- size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
- else
- call compute_forces_crust_mantle(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- b_displ_crust_mantle,b_accel_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
-!----------------------
- is_on_a_slice_edge_crust_mantle,b_icall, &
- b_accel_inner_core,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector,b_iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,b_buffer_all_cube_from_slices,b_buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,b_iphase_CC, &
-!----------------------
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_crust_mantle,kappahstore_crust_mantle,muvstore_crust_mantle, &
- muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- b_R_memory_crust_mantle,b_epsilondev_crust_mantle, &
- b_eps_trace_over_3_crust_mantle,one_minus_sum_beta_crust_mantle, &
- b_alphaval,b_betaval,b_gammaval,factor_common_crust_mantle, &
- size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
- size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
-
- endif
- endif
-
- ! Deville routine
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- call compute_forces_inner_core_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_inner_core,accel_inner_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
-!----------------------
- is_on_a_slice_edge_inner_core,icall, &
- accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
- c11store_inner_core,c33store_inner_core,c12store_inner_core, &
- c13store_inner_core,c44store_inner_core, &
- R_memory_inner_core,epsilondev_inner_core, eps_trace_over_3_inner_core,&
- one_minus_sum_beta_inner_core, &
- alphaval,betaval,gammaval, &
- factor_common_inner_core, &
- size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
- size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
- else
- call compute_forces_inner_core(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_inner_core,accel_inner_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
-!----------------------
- is_on_a_slice_edge_inner_core,icall, &
- accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
- c11store_inner_core,c33store_inner_core,c12store_inner_core, &
- c13store_inner_core,c44store_inner_core, &
- R_memory_inner_core,epsilondev_inner_core, eps_trace_over_3_inner_core,&
- one_minus_sum_beta_inner_core, &
- alphaval,betaval,gammaval, &
- factor_common_inner_core, &
- size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
- size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
- endif
-
- if (SIMULATION_TYPE == 3) then
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- call compute_forces_inner_core_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- b_displ_inner_core,b_accel_inner_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
-!----------------------
- is_on_a_slice_edge_inner_core,b_icall, &
- b_accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector,b_iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,b_buffer_all_cube_from_slices,b_buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,b_iphase_CC, &
-!----------------------
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
- c11store_inner_core,c33store_inner_core,c12store_inner_core, &
- c13store_inner_core,c44store_inner_core, &
- b_R_memory_inner_core,b_epsilondev_inner_core, b_eps_trace_over_3_inner_core,&
- one_minus_sum_beta_inner_core, &
- b_alphaval,b_betaval,b_gammaval, &
- factor_common_inner_core, &
- size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
- size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
- else
- call compute_forces_inner_core(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- b_displ_inner_core,b_accel_inner_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
-!----------------------
- is_on_a_slice_edge_inner_core,b_icall, &
- b_accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector,b_iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,b_buffer_all_cube_from_slices,b_buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,b_iphase_CC, &
-!----------------------
- hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
- c11store_inner_core,c33store_inner_core,c12store_inner_core, &
- c13store_inner_core,c44store_inner_core, &
- b_R_memory_inner_core,b_epsilondev_inner_core, b_eps_trace_over_3_inner_core,&
- one_minus_sum_beta_inner_core, &
- b_alphaval,b_betaval,b_gammaval, &
- factor_common_inner_core, &
- size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
- size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
- endif
- endif
-
- ! Stacey
- if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) then
- call compute_stacey_crust_mantle(ichunk,SIMULATION_TYPE, &
- NSTEP,it,SAVE_FORWARD,ibool_crust_mantle, &
- veloc_crust_mantle,accel_crust_mantle,b_accel_crust_mantle, &
- jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle, &
- jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle, &
- wgllwgll_xz,wgllwgll_yz, &
- normal_xmin_crust_mantle,normal_xmax_crust_mantle, &
- normal_ymin_crust_mantle,normal_ymax_crust_mantle, &
- rho_vp_crust_mantle,rho_vs_crust_mantle, &
- ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle, &
- ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle, &
- nimin_crust_mantle,nimax_crust_mantle, &
- njmin_crust_mantle,njmax_crust_mantle, &
- nkmin_xi_crust_mantle,nkmin_eta_crust_mantle, &
- nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
- nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
- reclen_xmin_crust_mantle,reclen_xmax_crust_mantle, &
- reclen_ymin_crust_mantle,reclen_ymax_crust_mantle, &
- nabs_xmin_cm,nabs_xmax_cm,nabs_ymin_cm,nabs_ymax_cm, &
- absorb_xmin_crust_mantle5,absorb_xmax_crust_mantle5, &
- absorb_ymin_crust_mantle5,absorb_ymax_crust_mantle5)
- endif ! Stacey conditions
-
- ! add the sources
- if (SIMULATION_TYPE == 1) &
- call compute_add_sources(myrank,NSOURCES, &
- accel_crust_mantle,sourcearrays, &
- DT,t0,tshift_cmt,hdur_gaussian,ibool_crust_mantle, &
- islice_selected_source,ispec_selected_source,it, &
- hdur,xi_source,eta_source,gamma_source,nu_source)
-
- ! add adjoint sources
- if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
- if( nadj_rec_local > 0 ) &
- call compute_add_sources_adjoint(myrank,nrec, &
- nadj_rec_local,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC, &
- accel_crust_mantle,adj_sourcearrays, &
- nu,xi_receiver,eta_receiver,gamma_receiver, &
- xigll,yigll,zigll,ibool_crust_mantle, &
- islice_selected_rec,ispec_selected_rec, &
- NSTEP_SUB_ADJ,iadjsrc_len,iadjsrc,iadj_vec, &
- it,it_begin,station_name,network_name,DT)
- endif
-
- ! add sources for backward/reconstructed wavefield
- if (SIMULATION_TYPE == 3) &
- call compute_add_sources_backward(myrank,NSOURCES,NSTEP, &
- b_accel_crust_mantle,sourcearrays, &
- DT,t0,tshift_cmt,hdur_gaussian,ibool_crust_mantle, &
- islice_selected_source,ispec_selected_source,it, &
- hdur,xi_source,eta_source,gamma_source,nu_source)
-
-!<YANGL
- ! NOISE_TOMOGRAPHY
- if ( NOISE_TOMOGRAPHY == 1 ) then
- ! the first step of noise tomography is to use |S(\omega)|^2 as a point force source at one of the receivers.
- ! hence, instead of a moment tensor 'sourcearrays', a 'noise_sourcearray' for a point force is needed.
- ! furthermore, the CMTSOLUTION needs to be zero, i.e., no earthquakes.
- ! now this must be manually set in DATA/CMTSOLUTION, by USERS.
- call add_source_master_rec_noise(myrank,nrec, &
- NSTEP,accel_crust_mantle,noise_sourcearray, &
- ibool_crust_mantle,islice_selected_rec,ispec_selected_rec, &
- it,irec_master_noise)
- elseif ( NOISE_TOMOGRAPHY == 2 ) then
- ! second step of noise tomography, i.e., read the surface movie saved at every timestep
- ! use the movie to drive the ensemble forward wavefield
- call noise_read_add_surface_movie(nmovie_points,accel_crust_mantle, &
- normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
- ibelm_top_crust_mantle,ibool_crust_mantle, &
- NSPEC2D_TOP(IREGION_CRUST_MANTLE),noise_surface_movie, &
- NSTEP-it+1,jacobian2D_top_crust_mantle,wgllwgll_xy)
- ! be careful, since ensemble forward sources are reversals of generating wavefield "eta"
- ! hence the "NSTEP-it+1", i.e., start reading from the last timestep
- ! note the ensemble forward sources are generally distributed on the surface of the earth
- ! that's to say, the ensemble forward source is kind of a surface force density, not a body force density
- ! therefore, we must add it here, before applying the inverse of mass matrix
- elseif ( NOISE_TOMOGRAPHY == 3 ) then
- ! third step of noise tomography, i.e., read the surface movie saved at every timestep
- ! use the movie to reconstruct the ensemble forward wavefield
- ! the ensemble adjoint wavefield is done as usual
- ! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal
- call noise_read_add_surface_movie(nmovie_points,b_accel_crust_mantle, &
- normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
- ibelm_top_crust_mantle,ibool_crust_mantle, &
- NSPEC2D_TOP(IREGION_CRUST_MANTLE),noise_surface_movie, &
- it,jacobian2D_top_crust_mantle,wgllwgll_xy)
- endif
-!>YANGL
-
- ! ****************************************************
- ! ********** add matching with fluid part **********
- ! ****************************************************
-
- ! only for elements in first matching layer in the solid
-
- !---
- !--- couple with outer core at the bottom of the mantle
- !---
- if(ACTUALLY_COUPLE_FLUID_CMB) &
- call compute_coupling_CMB_fluid(displ_crust_mantle,b_displ_crust_mantle, &
- accel_crust_mantle,b_accel_crust_mantle, &
- ibool_crust_mantle,ibelm_bottom_crust_mantle, &
- accel_outer_core,b_accel_outer_core, &
- normal_top_outer_core,jacobian2D_top_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
- RHO_TOP_OC,minus_g_cmb, &
- SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE))
-
- !---
- !--- couple with outer core at the top of the inner core
- !---
- if(ACTUALLY_COUPLE_FLUID_ICB) &
- call compute_coupling_ICB_fluid(displ_inner_core,b_displ_inner_core, &
- accel_inner_core,b_accel_inner_core, &
- ibool_inner_core,ibelm_top_inner_core, &
- accel_outer_core,b_accel_outer_core, &
- normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
- RHO_BOTTOM_OC,minus_g_icb, &
- SIMULATION_TYPE,NSPEC2D_TOP(IREGION_INNER_CORE))
-
-
- ! assemble all the contributions between slices using MPI
-
-! assemble all the contributions between slices using MPI
-! crust/mantle and inner core handled in the same call
-! in order to reduce the number of MPI messages by 2
- if(USE_NONBLOCKING_COMMS) then
-
- iphase = 1 ! initialize the non blocking communication counter
- iphase_CC = 1 ! initialize the non blocking communication counter for the central cube
-
-! start the non blocking communications
- call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
- NUMMSGS_FACES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
- NGLOB1D_RADIAL(IREGION_INNER_CORE),NCHUNKS_VAL,iphase)
-
- icall = 2 ! now compute all the inner elements in the case of non blocking MPI
-
- ! compute internal forces in the solid regions
-
- ! for anisotropy and gravity, x y and z contain r theta and phi
-
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- call compute_forces_crust_mantle_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_crust_mantle,accel_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
-!----------------------
- is_on_a_slice_edge_crust_mantle,icall, &
- accel_inner_core,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_xxT, &
- hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_crust_mantle,kappahstore_crust_mantle,muvstore_crust_mantle, &
- muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- !---idoubling_crust_mantle, &
- R_memory_crust_mantle,epsilondev_crust_mantle, &
- eps_trace_over_3_crust_mantle,one_minus_sum_beta_crust_mantle, &
- alphaval,betaval,gammaval,factor_common_crust_mantle, &
- size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
- size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
- else
- call compute_forces_crust_mantle(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_crust_mantle,accel_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
-!----------------------
- is_on_a_slice_edge_crust_mantle,icall, &
- accel_inner_core,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_crust_mantle,kappahstore_crust_mantle,muvstore_crust_mantle, &
- muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- R_memory_crust_mantle,epsilondev_crust_mantle, &
- eps_trace_over_3_crust_mantle,one_minus_sum_beta_crust_mantle, &
- alphaval,betaval,gammaval,factor_common_crust_mantle, &
- size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
- size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
- endif
-
- ! Deville routine
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- call compute_forces_inner_core_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_inner_core,accel_inner_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
-!----------------------
- is_on_a_slice_edge_inner_core,icall, &
- accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
- c11store_inner_core,c33store_inner_core,c12store_inner_core, &
- c13store_inner_core,c44store_inner_core, &
- R_memory_inner_core,epsilondev_inner_core, eps_trace_over_3_inner_core,&
- one_minus_sum_beta_inner_core, &
- alphaval,betaval,gammaval, &
- factor_common_inner_core, &
- size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
- size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
- else
- call compute_forces_inner_core(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ_inner_core,accel_inner_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
-!----------------------
- is_on_a_slice_edge_inner_core,icall, &
- accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
- hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
- c11store_inner_core,c33store_inner_core,c12store_inner_core, &
- c13store_inner_core,c44store_inner_core, &
- R_memory_inner_core,epsilondev_inner_core, eps_trace_over_3_inner_core,&
- one_minus_sum_beta_inner_core, &
- alphaval,betaval,gammaval, &
- factor_common_inner_core, &
- size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
- size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
- endif
-
-! assemble all the contributions between slices using MPI
-! crust/mantle and inner core handled in the same call
-! in order to reduce the number of MPI messages by 2
- do while (iphase <= 7) ! make sure the last communications are finished and processed
- call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
- NUMMSGS_FACES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
- NGLOB1D_RADIAL(IREGION_INNER_CORE),NCHUNKS_VAL,iphase)
- enddo
- else
- ! crust/mantle and inner core handled in the same call
- ! in order to reduce the number of MPI messages by 2
- call assemble_MPI_vector_block(myrank, &
- accel_crust_mantle,NGLOB_CRUST_MANTLE, &
- accel_inner_core,NGLOB_INNER_CORE, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle, &
- iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core, &
- iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces,imsg_type, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces,buffer_received_faces, &
- buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
- NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL, &
- NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
- NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE), &
- NGLOB1D_RADIAL(IREGION_INNER_CORE), &
- NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE), &
- NGLOB2DMAX_XY,NCHUNKS_VAL)
- endif
-
- !---
- !--- use buffers to assemble forces with the central cube
- !---
-
- if(INCLUDE_CENTRAL_CUBE) then
- if(USE_NONBLOCKING_COMMS) then
- do while (iphase_CC <= 4) ! make sure the last communications are finished and processed
- call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
- ibelm_bottom_inner_core,NSPEC2D_BOTTOM(IREGION_INNER_CORE),accel_inner_core,NDIM,iphase_CC)
- enddo
- else
- call assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,buffer_slices2,ibool_central_cube, &
- receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core,NSPEC_INNER_CORE, &
- ibelm_bottom_inner_core,NSPEC2D_BOTTOM(IREGION_INNER_CORE),NGLOB_INNER_CORE,accel_inner_core,NDIM)
- endif
- endif ! end of assembling forces with the central cube
-
-#ifdef _HANDOPT
-! way 2:
- if(imodulo_NGLOB_CRUST_MANTLE4 >= 1) then
- do i=1,imodulo_NGLOB_CRUST_MANTLE4
- accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmass_crust_mantle(i) &
- + two_omega_earth*veloc_crust_mantle(2,i)
- accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmass_crust_mantle(i) &
- - two_omega_earth*veloc_crust_mantle(1,i)
- accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmass_crust_mantle(i)
- enddo
- endif
- do i=imodulo_NGLOB_CRUST_MANTLE4+1,NGLOB_CRUST_MANTLE,4
- accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmass_crust_mantle(i) &
- + two_omega_earth*veloc_crust_mantle(2,i)
- accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmass_crust_mantle(i) &
- - two_omega_earth*veloc_crust_mantle(1,i)
- accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmass_crust_mantle(i)
-
- accel_crust_mantle(1,i+1) = accel_crust_mantle(1,i+1)*rmass_crust_mantle(i+1) &
- + two_omega_earth*veloc_crust_mantle(2,i+1)
- accel_crust_mantle(2,i+1) = accel_crust_mantle(2,i+1)*rmass_crust_mantle(i+1) &
- - two_omega_earth*veloc_crust_mantle(1,i+1)
- accel_crust_mantle(3,i+1) = accel_crust_mantle(3,i+1)*rmass_crust_mantle(i+1)
-
- accel_crust_mantle(1,i+2) = accel_crust_mantle(1,i+2)*rmass_crust_mantle(i+2) &
- + two_omega_earth*veloc_crust_mantle(2,i+2)
- accel_crust_mantle(2,i+2) = accel_crust_mantle(2,i+2)*rmass_crust_mantle(i+2) &
- - two_omega_earth*veloc_crust_mantle(1,i+2)
- accel_crust_mantle(3,i+2) = accel_crust_mantle(3,i+2)*rmass_crust_mantle(i+2)
-
- accel_crust_mantle(1,i+3) = accel_crust_mantle(1,i+3)*rmass_crust_mantle(i+3) &
- + two_omega_earth*veloc_crust_mantle(2,i+3)
- accel_crust_mantle(2,i+3) = accel_crust_mantle(2,i+3)*rmass_crust_mantle(i+3) &
- - two_omega_earth*veloc_crust_mantle(1,i+3)
- accel_crust_mantle(3,i+3) = accel_crust_mantle(3,i+3)*rmass_crust_mantle(i+3)
- enddo
-#else
-! way 1:
- do i=1,NGLOB_CRUST_MANTLE
- accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmass_crust_mantle(i) &
- + two_omega_earth*veloc_crust_mantle(2,i)
- accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmass_crust_mantle(i) &
- - two_omega_earth*veloc_crust_mantle(1,i)
- accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmass_crust_mantle(i)
- enddo
-#endif
-
- if (SIMULATION_TYPE == 3) then
-
-! ------------------- new non blocking implementation -------------------
-
- ! assemble all the contributions between slices using MPI
-
-! assemble all the contributions between slices using MPI
-! crust/mantle and inner core handled in the same call
-! in order to reduce the number of MPI messages by 2
- if(USE_NONBLOCKING_COMMS) then
-
- b_iphase = 1 ! initialize the non blocking communication counter
- b_iphase_CC = 1 ! initialize the non blocking communication counter for the central cube
-
-! start the non blocking communications
- call assemble_MPI_vector(myrank,b_accel_crust_mantle,b_accel_inner_core, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector, &
- NUMMSGS_FACES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
- NGLOB1D_RADIAL(IREGION_INNER_CORE),NCHUNKS_VAL,b_iphase)
-
- b_icall = 2 ! now compute all the inner elements in the case of non blocking MPI
-
- ! compute internal forces in the solid regions
-
- ! for anisotropy and gravity, x y and z contain r theta and phi
-
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- call compute_forces_crust_mantle_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- b_displ_crust_mantle,b_accel_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
-!----------------------
- is_on_a_slice_edge_crust_mantle,b_icall, &
- b_accel_inner_core,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector,b_iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,b_buffer_all_cube_from_slices,b_buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,b_iphase_CC, &
-!----------------------
- hprime_xx,hprime_xxT, &
- hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_crust_mantle,kappahstore_crust_mantle,muvstore_crust_mantle, &
- muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- b_R_memory_crust_mantle,b_epsilondev_crust_mantle, &
- b_eps_trace_over_3_crust_mantle,one_minus_sum_beta_crust_mantle, &
- b_alphaval,b_betaval,b_gammaval,factor_common_crust_mantle, &
- size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
- size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
- else
- call compute_forces_crust_mantle(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- b_displ_crust_mantle,b_accel_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
-!----------------------
- is_on_a_slice_edge_crust_mantle,b_icall, &
- b_accel_inner_core,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector,b_iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,b_buffer_all_cube_from_slices,b_buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,b_iphase_CC, &
-!----------------------
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_crust_mantle,kappahstore_crust_mantle,muvstore_crust_mantle, &
- muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- !--idoubling_crust_mantle, &
- b_R_memory_crust_mantle,b_epsilondev_crust_mantle, &
- b_eps_trace_over_3_crust_mantle,one_minus_sum_beta_crust_mantle, &
- b_alphaval,b_betaval,b_gammaval,factor_common_crust_mantle, &
- size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
- size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
- endif
-
- ! Deville routine
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- call compute_forces_inner_core_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- b_displ_inner_core,b_accel_inner_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
-!----------------------
- is_on_a_slice_edge_inner_core,b_icall, &
- b_accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector,b_iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,b_buffer_all_cube_from_slices,b_buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,b_iphase_CC, &
-!----------------------
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
- c11store_inner_core,c33store_inner_core,c12store_inner_core, &
- c13store_inner_core,c44store_inner_core, &
- b_R_memory_inner_core,b_epsilondev_inner_core, b_eps_trace_over_3_inner_core,&
- one_minus_sum_beta_inner_core, &
- b_alphaval,b_betaval,b_gammaval, &
- factor_common_inner_core, &
- size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
- size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
- else
- call compute_forces_inner_core(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- b_displ_inner_core,b_accel_inner_core, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
-!----------------------
- is_on_a_slice_edge_inner_core,b_icall, &
- b_accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
- myrank,iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector,b_iphase, &
- nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,b_buffer_all_cube_from_slices,b_buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,b_iphase_CC, &
-!----------------------
- hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
- c11store_inner_core,c33store_inner_core,c12store_inner_core, &
- c13store_inner_core,c44store_inner_core, &
- b_R_memory_inner_core,b_epsilondev_inner_core, b_eps_trace_over_3_inner_core,&
- one_minus_sum_beta_inner_core, &
- b_alphaval,b_betaval,b_gammaval, &
- factor_common_inner_core, &
- size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
- size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
- endif
-
-! assemble all the contributions between slices using MPI
-! crust/mantle and inner core handled in the same call
-! in order to reduce the number of MPI messages by 2
- do while (b_iphase <= 7) ! make sure the last communications are finished and processed
- call assemble_MPI_vector(myrank,b_accel_crust_mantle,b_accel_inner_core, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces,npoin2D_max_all_CM_IC, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector, &
- NUMMSGS_FACES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
- NGLOB1D_RADIAL(IREGION_INNER_CORE),NCHUNKS_VAL,b_iphase)
- enddo
- else
- ! crust/mantle and inner core handled in the same call
- ! in order to reduce the number of MPI messages by 2
- call assemble_MPI_vector_block(myrank, &
- b_accel_crust_mantle,NGLOB_CRUST_MANTLE, &
- b_accel_inner_core,NGLOB_INNER_CORE, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle, &
- iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
- npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
- iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
- iboolleft_xi_inner_core,iboolright_xi_inner_core, &
- iboolleft_eta_inner_core,iboolright_eta_inner_core, &
- npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
- iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces,imsg_type, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- b_buffer_send_faces,b_buffer_received_faces, &
- b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector, &
- NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
- NPROC_XI_VAL,NPROC_ETA_VAL, &
- NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
- NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE), &
- NGLOB1D_RADIAL(IREGION_INNER_CORE), &
- NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE), &
- NGLOB2DMAX_XY,NCHUNKS_VAL)
- endif
-
- !---
- !--- use buffers to assemble forces with the central cube
- !---
-
- if(INCLUDE_CENTRAL_CUBE) then
- if(USE_NONBLOCKING_COMMS) then
- do while (b_iphase_CC <= 4) ! make sure the last communications are finished and processed
- call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,b_buffer_all_cube_from_slices,b_buffer_slices,ibool_central_cube, &
- receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
- ibelm_bottom_inner_core,NSPEC2D_BOTTOM(IREGION_INNER_CORE),b_accel_inner_core,NDIM,b_iphase_CC)
- enddo
- else
- call assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
- npoin2D_cube_from_slices,b_buffer_all_cube_from_slices,b_buffer_slices,buffer_slices2,ibool_central_cube, &
- receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core,NSPEC_INNER_CORE, &
- ibelm_bottom_inner_core,NSPEC2D_BOTTOM(IREGION_INNER_CORE),NGLOB_INNER_CORE,b_accel_inner_core,NDIM)
- endif
- endif ! end of assembling forces with the central cube
-
-! ------------------- new non blocking implementation -------------------
-
-#ifdef _HANDOPT
-! way 2:
- if( imodulo_NGLOB_CRUST_MANTLE4 >=1 ) then
- do i=1,imodulo_NGLOB_CRUST_MANTLE4
- b_accel_crust_mantle(1,i) = b_accel_crust_mantle(1,i)*rmass_crust_mantle(i) &
- + b_two_omega_earth*b_veloc_crust_mantle(2,i)
- b_accel_crust_mantle(2,i) = b_accel_crust_mantle(2,i)*rmass_crust_mantle(i) &
- - b_two_omega_earth*b_veloc_crust_mantle(1,i)
- b_accel_crust_mantle(3,i) = b_accel_crust_mantle(3,i)*rmass_crust_mantle(i)
- enddo
- endif
- do i=imodulo_NGLOB_CRUST_MANTLE4+1,NGLOB_CRUST_MANTLE,4
- b_accel_crust_mantle(1,i) = b_accel_crust_mantle(1,i)*rmass_crust_mantle(i) &
- + b_two_omega_earth*b_veloc_crust_mantle(2,i)
- b_accel_crust_mantle(2,i) = b_accel_crust_mantle(2,i)*rmass_crust_mantle(i) &
- - b_two_omega_earth*b_veloc_crust_mantle(1,i)
- b_accel_crust_mantle(3,i) = b_accel_crust_mantle(3,i)*rmass_crust_mantle(i)
-
- b_accel_crust_mantle(1,i+1) = b_accel_crust_mantle(1,i+1)*rmass_crust_mantle(i+1) &
- + b_two_omega_earth*b_veloc_crust_mantle(2,i+1)
- b_accel_crust_mantle(2,i+1) = b_accel_crust_mantle(2,i+1)*rmass_crust_mantle(i+1) &
- - b_two_omega_earth*b_veloc_crust_mantle(1,i+1)
- b_accel_crust_mantle(3,i+1) = b_accel_crust_mantle(3,i+1)*rmass_crust_mantle(i+1)
-
- b_accel_crust_mantle(1,i+2) = b_accel_crust_mantle(1,i+2)*rmass_crust_mantle(i+2) &
- + b_two_omega_earth*b_veloc_crust_mantle(2,i+2)
- b_accel_crust_mantle(2,i+2) = b_accel_crust_mantle(2,i+2)*rmass_crust_mantle(i+2) &
- - b_two_omega_earth*b_veloc_crust_mantle(1,i+2)
- b_accel_crust_mantle(3,i+2) = b_accel_crust_mantle(3,i+2)*rmass_crust_mantle(i+2)
-
- b_accel_crust_mantle(1,i+3) = b_accel_crust_mantle(1,i+3)*rmass_crust_mantle(i+3) &
- + b_two_omega_earth*b_veloc_crust_mantle(2,i+3)
- b_accel_crust_mantle(2,i+3) = b_accel_crust_mantle(2,i+3)*rmass_crust_mantle(i+3) &
- - b_two_omega_earth*b_veloc_crust_mantle(1,i+3)
- b_accel_crust_mantle(3,i+3) = b_accel_crust_mantle(3,i+3)*rmass_crust_mantle(i+3)
- enddo
-#else
-! way 1:
- do i=1,NGLOB_CRUST_MANTLE
- b_accel_crust_mantle(1,i) = b_accel_crust_mantle(1,i)*rmass_crust_mantle(i) &
- + b_two_omega_earth*b_veloc_crust_mantle(2,i)
- b_accel_crust_mantle(2,i) = b_accel_crust_mantle(2,i)*rmass_crust_mantle(i) &
- - b_two_omega_earth*b_veloc_crust_mantle(1,i)
- b_accel_crust_mantle(3,i) = b_accel_crust_mantle(3,i)*rmass_crust_mantle(i)
- enddo
-#endif
-
- endif ! SIMULATION_TYPE == 3
-
- ! couples ocean with crust mantle
- if(OCEANS_VAL) &
- call compute_coupling_ocean(accel_crust_mantle,b_accel_crust_mantle, &
- rmass_crust_mantle,rmass_ocean_load,normal_top_crust_mantle, &
- ibool_crust_mantle,ibelm_top_crust_mantle, &
- updated_dof_ocean_load, &
- SIMULATION_TYPE,NSPEC2D_TOP(IREGION_CRUST_MANTLE))
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!
- ! Newmark time scheme - corrector for elastic parts
-#ifdef _HANDOPT
-! way 2:
- ! mantle
- if( imodulo_NGLOB_CRUST_MANTLE4 >= 1 ) then
- do i=1,imodulo_NGLOB_CRUST_MANTLE4
- veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
- enddo
- endif
- do i=imodulo_NGLOB_CRUST_MANTLE4+1,NGLOB_CRUST_MANTLE,4
- veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
- veloc_crust_mantle(:,i+1) = veloc_crust_mantle(:,i+1) + deltatover2*accel_crust_mantle(:,i+1)
- veloc_crust_mantle(:,i+2) = veloc_crust_mantle(:,i+2) + deltatover2*accel_crust_mantle(:,i+2)
- veloc_crust_mantle(:,i+3) = veloc_crust_mantle(:,i+3) + deltatover2*accel_crust_mantle(:,i+3)
- enddo
-
- ! inner core
- if(imodulo_NGLOB_INNER_CORE >= 1) then
- do i=1,imodulo_NGLOB_INNER_CORE
- accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
- + two_omega_earth*veloc_inner_core(2,i)
- accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
- - two_omega_earth*veloc_inner_core(1,i)
- accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
-
- veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
- enddo
- endif
- do i=imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE,3
- accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
- + two_omega_earth*veloc_inner_core(2,i)
- accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
- - two_omega_earth*veloc_inner_core(1,i)
- accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
-
- accel_inner_core(1,i+1) = accel_inner_core(1,i+1)*rmass_inner_core(i+1) &
- + two_omega_earth*veloc_inner_core(2,i+1)
- accel_inner_core(2,i+1) = accel_inner_core(2,i+1)*rmass_inner_core(i+1) &
- - two_omega_earth*veloc_inner_core(1,i+1)
- accel_inner_core(3,i+1) = accel_inner_core(3,i+1)*rmass_inner_core(i+1)
-
- accel_inner_core(1,i+2) = accel_inner_core(1,i+2)*rmass_inner_core(i+2) &
- + two_omega_earth*veloc_inner_core(2,i+2)
- accel_inner_core(2,i+2) = accel_inner_core(2,i+2)*rmass_inner_core(i+2) &
- - two_omega_earth*veloc_inner_core(1,i+2)
- accel_inner_core(3,i+2) = accel_inner_core(3,i+2)*rmass_inner_core(i+2)
-
- veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
- veloc_inner_core(:,i+1) = veloc_inner_core(:,i+1) + deltatover2*accel_inner_core(:,i+1)
- veloc_inner_core(:,i+2) = veloc_inner_core(:,i+2) + deltatover2*accel_inner_core(:,i+2)
- enddo
-#else
-! way 1:
- ! mantle
- do i=1,NGLOB_CRUST_MANTLE
- veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
- enddo
- ! inner core
- do i=1,NGLOB_INNER_CORE
- accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
- + two_omega_earth*veloc_inner_core(2,i)
- accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
- - two_omega_earth*veloc_inner_core(1,i)
- accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
-
- veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
- enddo
-#endif
-
- if (SIMULATION_TYPE == 3) then
-#ifdef _HANDOPT
-! way 2:
- ! mantle
- if( imodulo_NGLOB_CRUST_MANTLE4 >= 1 ) then
- do i=1,imodulo_NGLOB_CRUST_MANTLE4
- b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) + b_deltatover2*b_accel_crust_mantle(:,i)
- enddo
- endif
- do i=imodulo_NGLOB_CRUST_MANTLE4+1,NGLOB_CRUST_MANTLE,4
- b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) + b_deltatover2*b_accel_crust_mantle(:,i)
- b_veloc_crust_mantle(:,i+1) = b_veloc_crust_mantle(:,i+1) + b_deltatover2*b_accel_crust_mantle(:,i+1)
- b_veloc_crust_mantle(:,i+2) = b_veloc_crust_mantle(:,i+2) + b_deltatover2*b_accel_crust_mantle(:,i+2)
- b_veloc_crust_mantle(:,i+3) = b_veloc_crust_mantle(:,i+3) + b_deltatover2*b_accel_crust_mantle(:,i+3)
- enddo
- ! inner core
- if(imodulo_NGLOB_INNER_CORE >= 1) then
- do i=1,imodulo_NGLOB_INNER_CORE
- b_accel_inner_core(1,i) = b_accel_inner_core(1,i)*rmass_inner_core(i) &
- + b_two_omega_earth*b_veloc_inner_core(2,i)
- b_accel_inner_core(2,i) = b_accel_inner_core(2,i)*rmass_inner_core(i) &
- - b_two_omega_earth*b_veloc_inner_core(1,i)
- b_accel_inner_core(3,i) = b_accel_inner_core(3,i)*rmass_inner_core(i)
-
- b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) + b_deltatover2*b_accel_inner_core(:,i)
- enddo
- endif
- do i=imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE,3
- b_accel_inner_core(1,i) = b_accel_inner_core(1,i)*rmass_inner_core(i) &
- + b_two_omega_earth*b_veloc_inner_core(2,i)
- b_accel_inner_core(2,i) = b_accel_inner_core(2,i)*rmass_inner_core(i) &
- - b_two_omega_earth*b_veloc_inner_core(1,i)
- b_accel_inner_core(3,i) = b_accel_inner_core(3,i)*rmass_inner_core(i)
-
- b_accel_inner_core(1,i+1) = b_accel_inner_core(1,i+1)*rmass_inner_core(i+1) &
- + b_two_omega_earth*b_veloc_inner_core(2,i+1)
- b_accel_inner_core(2,i+1) = b_accel_inner_core(2,i+1)*rmass_inner_core(i+1) &
- - b_two_omega_earth*b_veloc_inner_core(1,i+1)
- b_accel_inner_core(3,i+1) = b_accel_inner_core(3,i+1)*rmass_inner_core(i+1)
-
- b_accel_inner_core(1,i+2) = b_accel_inner_core(1,i+2)*rmass_inner_core(i+2) &
- + b_two_omega_earth*b_veloc_inner_core(2,i+2)
- b_accel_inner_core(2,i+2) = b_accel_inner_core(2,i+2)*rmass_inner_core(i+2) &
- - b_two_omega_earth*b_veloc_inner_core(1,i+2)
- b_accel_inner_core(3,i+2) = b_accel_inner_core(3,i+2)*rmass_inner_core(i+2)
-
- b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) + b_deltatover2*b_accel_inner_core(:,i)
- b_veloc_inner_core(:,i+1) = b_veloc_inner_core(:,i+1) + b_deltatover2*b_accel_inner_core(:,i+1)
- b_veloc_inner_core(:,i+2) = b_veloc_inner_core(:,i+2) + b_deltatover2*b_accel_inner_core(:,i+2)
-
- enddo
-#else
-! way 1:
- ! mantle
- do i=1,NGLOB_CRUST_MANTLE
- b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) + b_deltatover2*b_accel_crust_mantle(:,i)
- enddo
- ! inner core
- do i=1,NGLOB_INNER_CORE
- b_accel_inner_core(1,i) = b_accel_inner_core(1,i)*rmass_inner_core(i) &
- + b_two_omega_earth*b_veloc_inner_core(2,i)
- b_accel_inner_core(2,i) = b_accel_inner_core(2,i)*rmass_inner_core(i) &
- - b_two_omega_earth*b_veloc_inner_core(1,i)
- b_accel_inner_core(3,i) = b_accel_inner_core(3,i)*rmass_inner_core(i)
-
- b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) + b_deltatover2*b_accel_inner_core(:,i)
- enddo
-#endif
-
- endif ! SIMULATION_TYPE == 3
-
-
- ! restores last time snapshot saved for backward/reconstruction of wavefields
- ! note: this is done here after the Newmark time scheme, otherwise the indexing for sources
- ! and adjoint sources will become more complicated
- ! that is, index it for adjoint sources will match index NSTEP - 1 for backward/reconstructed wavefields
- if( SIMULATION_TYPE == 3 .and. it == 1 ) then
- call read_forward_arrays(myrank, &
- b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
- b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
- b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core, &
- b_R_memory_crust_mantle,b_R_memory_inner_core, &
- b_epsilondev_crust_mantle,b_epsilondev_inner_core, &
- b_A_array_rotation,b_B_array_rotation,LOCAL_PATH)
- endif
-
-! write the seismograms with time shift
-
-! store the seismograms only if there is at least one receiver located in this slice
- if (nrec_local > 0) then
- if (SIMULATION_TYPE == 1) then
- call compute_seismograms(nrec_local,nrec,displ_crust_mantle, &
- nu,hxir_store,hetar_store,hgammar_store, &
- scale_displ,ibool_crust_mantle, &
- ispec_selected_rec,number_receiver_global, &
- seismo_current,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
- seismograms)
-
- else if (SIMULATION_TYPE == 2) then
- call compute_seismograms_adjoint(NSOURCES,nrec_local,displ_crust_mantle, &
- eps_trace_over_3_crust_mantle,epsilondev_crust_mantle, &
- nu_source,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
- hxir_store,hetar_store,hgammar_store, &
- hpxir_store,hpetar_store,hpgammar_store, &
- tshift_cmt,hdur_gaussian,DT,t0,scale_displ, &
- hprime_xx,hprime_yy,hprime_zz, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
- moment_der,sloc_der,stshift_der,shdur_der, &
- NTSTEP_BETWEEN_OUTPUT_SEISMOS,seismograms,deltat, &
- ibool_crust_mantle,ispec_selected_source,number_receiver_global, &
- NSTEP,it,nit_written)
-
- else if (SIMULATION_TYPE == 3) then
- call compute_seismograms_backward(nrec_local,nrec,b_displ_crust_mantle, &
- nu,hxir_store,hetar_store,hgammar_store, &
- scale_displ,ibool_crust_mantle, &
- ispec_selected_rec,number_receiver_global, &
- seismo_current,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
- seismograms)
-
- endif
- endif ! nrec_local
-
- ! write the current or final seismograms
- if(seismo_current == NTSTEP_BETWEEN_OUTPUT_SEISMOS .or. it == it_end) then
- if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
- call write_seismograms(myrank,seismograms,number_receiver_global,station_name, &
- network_name,stlat,stlon,stele,stbur, &
- nrec,nrec_local,ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI,DT,t0,it_end, &
- yr_SAC,jda_SAC,ho_SAC,mi_SAC,sec_SAC,t_cmt_SAC,t_shift_SAC, &
- elat_SAC,elon_SAC,depth_SAC,event_name_SAC,cmt_lat_SAC,cmt_lon_SAC,&
- cmt_depth_SAC,cmt_hdur_SAC,NPROCTOT_VAL, &
- OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM, &
- OUTPUT_SEISMOS_SAC_BINARY,ROTATE_SEISMOGRAMS_RT,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
- seismo_offset,seismo_current,WRITE_SEISMOGRAMS_BY_MASTER, &
- SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE,MODEL)
- if(myrank==0) then
- write(IMAIN,*)
- write(IMAIN,*) ' Total number of time steps written: ', it-it_begin+1
- write(IMAIN,*)
- endif
- else
- if( nrec_local > 0 ) &
- call write_adj_seismograms(seismograms,number_receiver_global, &
- nrec_local,it,nit_written,DT, &
- NSTEP,NTSTEP_BETWEEN_OUTPUT_SEISMOS,t0,LOCAL_PATH)
- nit_written = it
- endif
- seismo_offset = seismo_offset + seismo_current
- seismo_current = 0
- endif
-
-!
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!
-
-! kernel calculations
- if (SIMULATION_TYPE == 3) then
- ! crust mantle
- call compute_kernels_crust_mantle(ibool_crust_mantle, &
- rho_kl_crust_mantle,beta_kl_crust_mantle, &
- alpha_kl_crust_mantle,cijkl_kl_crust_mantle, &
- accel_crust_mantle,b_displ_crust_mantle, &
- epsilondev_crust_mantle,b_epsilondev_crust_mantle, &
- eps_trace_over_3_crust_mantle,b_eps_trace_over_3_crust_mantle, &
- deltat)
-
- ! outer core
- call compute_kernels_outer_core(ibool_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- hprime_xx,hprime_yy,hprime_zz, &
- displ_outer_core,accel_outer_core, &
- b_displ_outer_core,b_accel_outer_core, &
- vector_accel_outer_core,vector_displ_outer_core, &
- b_vector_displ_outer_core, &
- div_displ_outer_core,b_div_displ_outer_core, &
- rhostore_outer_core,kappavstore_outer_core, &
- rho_kl_outer_core,alpha_kl_outer_core, &
- deviatoric_outercore,nspec_beta_kl_outer_core,beta_kl_outer_core, &
- deltat)
-
- ! inner core
- call compute_kernels_inner_core(ibool_inner_core, &
- rho_kl_inner_core,beta_kl_inner_core, &
- alpha_kl_inner_core, &
- accel_inner_core,b_displ_inner_core, &
- epsilondev_inner_core,b_epsilondev_inner_core, &
- eps_trace_over_3_inner_core,b_eps_trace_over_3_inner_core, &
- deltat)
-
-!<YANGL
- ! NOISE TOMOGRAPHY --- source strength kernel
- if (NOISE_TOMOGRAPHY == 3) &
- call compute_kernels_strength_noise(nmovie_points,ibool_crust_mantle, &
- Sigma_kl_crust_mantle,displ_crust_mantle,deltat,it, &
- normal_x_noise,normal_y_noise,normal_z_noise, &
- NSPEC2D_TOP(IREGION_CRUST_MANTLE),noise_surface_movie, &
- ibelm_top_crust_mantle)
-!>YANGL
-
- ! --- boundary kernels ------
- if (SAVE_BOUNDARY_MESH) then
- fluid_solid_boundary = .false.
- iregion_code = IREGION_CRUST_MANTLE
-
- ! Moho
- if (.not. SUPPRESS_CRUSTAL_MESH .and. HONOR_1D_SPHERICAL_MOHO) then
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! -- idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_moho_top,normal_moho,moho_kl_top,fluid_solid_boundary,NSPEC2D_MOHO)
-
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_bot,ibelm_moho_bot,normal_moho,moho_kl_bot,fluid_solid_boundary,NSPEC2D_MOHO)
-
- moho_kl = moho_kl + (moho_kl_top - moho_kl_bot) * deltat
- endif
-
- ! 400
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_400_top,normal_400,d400_kl_top,fluid_solid_boundary,NSPEC2D_400)
-
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_bot,ibelm_400_bot,normal_400,d400_kl_bot,fluid_solid_boundary,NSPEC2D_400)
-
- d400_kl = d400_kl + (d400_kl_top - d400_kl_bot) * deltat
-
- ! 670
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_670_top,normal_670,d670_kl_top,fluid_solid_boundary,NSPEC2D_670)
-
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_bot,ibelm_670_bot,normal_670,d670_kl_bot,fluid_solid_boundary,NSPEC2D_670)
-
- d670_kl = d670_kl + (d670_kl_top - d670_kl_bot) * deltat
-
- ! CMB
- fluid_solid_boundary = .true.
- iregion_code = IREGION_CRUST_MANTLE
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! -- idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle, muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_bottom_crust_mantle,normal_top_outer_core, &
- cmb_kl_top,fluid_solid_boundary,NSPEC2D_CMB)
-
- iregion_code = IREGION_OUTER_CORE
- call compute_boundary_kernel(vector_displ_outer_core,vector_accel_outer_core, &
- b_vector_displ_outer_core,nspec_outer_core, &
- iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,ispec_is_tiso_outer_core, &
- ! --idoubling_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core,&
- gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_outer_core,kappavstore_outer_core,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- k_bot,ibelm_top_outer_core,normal_top_outer_core, &
- cmb_kl_bot,fluid_solid_boundary,NSPEC2D_CMB)
-
- cmb_kl = cmb_kl + (cmb_kl_top - cmb_kl_bot) * deltat
-
- ! ICB
- fluid_solid_boundary = .true.
- call compute_boundary_kernel(vector_displ_outer_core,vector_accel_outer_core, &
- b_vector_displ_outer_core,nspec_outer_core, &
- iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,ispec_is_tiso_outer_core, &
- ! --idoubling_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core,&
- gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_outer_core,kappavstore_outer_core,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- k_top,ibelm_bottom_outer_core,normal_bottom_outer_core, &
- icb_kl_top,fluid_solid_boundary,NSPEC2D_ICB)
-
- iregion_code = IREGION_INNER_CORE
- call compute_boundary_kernel(displ_inner_core,accel_inner_core, &
- b_displ_inner_core,nspec_inner_core,iregion_code, &
- ystore_inner_core,zstore_inner_core,ibool_inner_core,ispec_is_tiso_inner_core, &
- ! -- idoubling_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core,&
- gammax_inner_core,gammay_inner_core,gammaz_inner_core,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
- dummy_array,dummy_array,dummy_array, &
- c11store_inner_core,c12store_inner_core,c13store_inner_core,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- c33store_inner_core,dummy_array,dummy_array, &
- dummy_array,c44store_inner_core,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- k_bot,ibelm_top_inner_core,normal_bottom_outer_core, &
- icb_kl_bot,fluid_solid_boundary,NSPEC2D_ICB)
-
- icb_kl = icb_kl + (icb_kl_top - icb_kl_bot) * deltat
- endif
-
- ! approximate hessian
- if( APPROXIMATE_HESS_KL ) then
- call compute_kernels_hessian(ibool_crust_mantle, &
- hess_kl_crust_mantle,&
- accel_crust_mantle,b_accel_crust_mantle, &
- deltat)
- endif
-
- endif ! end computing kernels
-
-!
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!
-
-!<YANGL
- ! first step of noise tomography, i.e., save a surface movie at every time step
- ! modified from the subroutine 'write_movie_surface'
- if ( NOISE_TOMOGRAPHY == 1 ) then
- call noise_save_surface_movie(displ_crust_mantle, &
- ibelm_top_crust_mantle,ibool_crust_mantle, &
- NSPEC2D_TOP(IREGION_CRUST_MANTLE),noise_surface_movie,it)
- endif
-!>YANGL
-
- ! save movie on surface
- if( MOVIE_SURFACE ) then
- if( mod(it,NTSTEP_BETWEEN_FRAMES) == 0) then
- ! save velocity here to avoid static offset on displacement for movies
- call write_movie_surface(myrank,nmovie_points,scale_veloc,veloc_crust_mantle, &
- scale_displ,displ_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- store_val_x,store_val_y,store_val_z, &
- store_val_x_all,store_val_y_all,store_val_z_all, &
- store_val_ux,store_val_uy,store_val_uz, &
- store_val_ux_all,store_val_uy_all,store_val_uz_all, &
- ibelm_top_crust_mantle,ibool_crust_mantle, &
- NSPEC2D_TOP(IREGION_CRUST_MANTLE), &
- NIT,it,OUTPUT_FILES,MOVIE_VOLUME_TYPE)
- endif
- endif
-
-
- ! save movie in full 3D mesh
- if(MOVIE_VOLUME ) then
- if( mod(it-MOVIE_START,NTSTEP_BETWEEN_FRAMES) == 0 &
- .and. it >= MOVIE_START .and. it <= MOVIE_STOP) then
-
- if (MOVIE_VOLUME_TYPE == 1) then ! output strains
-
- call write_movie_volume_strains(myrank,npoints_3dmovie, &
- LOCAL_PATH,MOVIE_VOLUME_TYPE,MOVIE_COARSE, &
- it,eps_trace_over_3_crust_mantle,epsilondev_crust_mantle, &
- muvstore_crust_mantle_3dmovie, &
- mask_3dmovie,nu_3dmovie)
-
- else if (MOVIE_VOLUME_TYPE == 2 .or. MOVIE_VOLUME_TYPE == 3) then
- ! output the Time Integral of Strain, or \mu*TIS
- call write_movie_volume_strains(myrank,npoints_3dmovie, &
- LOCAL_PATH,MOVIE_VOLUME_TYPE,MOVIE_COARSE, &
- it,Ieps_trace_over_3_crust_mantle,Iepsilondev_crust_mantle, &
- muvstore_crust_mantle_3dmovie, &
- mask_3dmovie,nu_3dmovie)
-
- else if (MOVIE_VOLUME_TYPE == 4) then ! output divergence and curl in whole volume
-
- call write_movie_volume_divcurl(myrank,it,eps_trace_over_3_crust_mantle,&
- div_displ_outer_core, &
- accel_outer_core,kappavstore_outer_core,rhostore_outer_core,ibool_outer_core, &
- eps_trace_over_3_inner_core, &
- epsilondev_crust_mantle,epsilondev_inner_core, &
- LOCAL_PATH, &
- displ_crust_mantle,displ_inner_core,displ_outer_core, &
- veloc_crust_mantle,veloc_inner_core,veloc_outer_core, &
- accel_crust_mantle,accel_inner_core, &
- ibool_crust_mantle,ibool_inner_core)
-
- else if (MOVIE_VOLUME_TYPE == 5) then !output displacement
- scalingval = scale_displ
- call write_movie_volume_vector(myrank,it,npoints_3dmovie, &
- LOCAL_PATH,MOVIE_VOLUME_TYPE, &
- MOVIE_COARSE,ibool_crust_mantle,displ_crust_mantle, &
- scalingval,mask_3dmovie,nu_3dmovie)
-
- else if (MOVIE_VOLUME_TYPE == 6) then !output velocity
- scalingval = scale_veloc
- call write_movie_volume_vector(myrank,it,npoints_3dmovie, &
- LOCAL_PATH,MOVIE_VOLUME_TYPE, &
- MOVIE_COARSE,ibool_crust_mantle,veloc_crust_mantle, &
- scalingval,mask_3dmovie,nu_3dmovie)
-
- else
-
- call exit_MPI(myrank, 'MOVIE_VOLUME_TYPE has to be 1,2,3,4,5 or 6')
-
- endif ! MOVIE_VOLUME_TYPE
- endif
- endif ! MOVIE_VOLUME
-
-!daniel: debugging
-! if( SNAPSHOT_INNER_CORE .and. mod(it-MOVIE_START,NTSTEP_BETWEEN_FRAMES) == 0 &
-! .and. it >= MOVIE_START .and. it <= MOVIE_STOP) then
-! ! VTK file output
-! ! displacement values
-! !write(prname,'(a,i6.6,a)') trim(LOCAL_PATH)//'/'//'proc',myrank,'_'
-! !write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_3_displ_',it
-! !call write_VTK_data_cr(idoubling_inner_core,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-! ! xstore_inner_core,ystore_inner_core,zstore_inner_core,ibool_inner_core, &
-! ! displ_inner_core,filename)
-!
-! write(prname,'(a)') 'OUTPUT_FILES/snapshot_all_'
-! write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_3_displ_',it
-! call write_VTK_data_cr_all(myrank,idoubling_inner_core, &
-! NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-! xstore_inner_core,ystore_inner_core,zstore_inner_core,ibool_inner_core, &
-! displ_inner_core,filename)
-!
-! endif
-
-
-!---- end of time iteration loop
-!
- enddo ! end of main time loop
-!
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!
-
- ! synchronize all processes, waits until all processes have written their seismograms
- call MPI_BARRIER(MPI_COMM_WORLD,ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error synchronize after time loop')
-
- ! closes Stacey absorbing boundary snapshots
- if( ABSORBING_CONDITIONS ) then
- ! crust mantle
- if (nspec2D_xmin_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- call close_file_abs(0)
- endif
-
- if (nspec2D_xmax_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- call close_file_abs(1)
- endif
-
- if (nspec2D_ymin_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- call close_file_abs(2)
- endif
-
- if (nspec2D_ymax_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- call close_file_abs(3)
- endif
-
- ! outer core
- if (nspec2D_xmin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- call close_file_abs(4)
- endif
-
- if (nspec2D_xmax_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- call close_file_abs(5)
- endif
-
- if (nspec2D_ymin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- call close_file_abs(6)
- endif
-
- if (nspec2D_ymax_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- call close_file_abs(7)
- endif
-
- if (NSPEC2D_BOTTOM(IREGION_OUTER_CORE) > 0 .and. (SIMULATION_TYPE == 3 &
- .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
- call close_file_abs(8)
- endif
-
- ! frees memory
- deallocate(absorb_xmin_crust_mantle5, &
- absorb_xmax_crust_mantle5, &
- absorb_ymin_crust_mantle5, &
- absorb_ymax_crust_mantle5, &
- absorb_xmin_outer_core, &
- absorb_xmax_outer_core, &
- absorb_ymin_outer_core, &
- absorb_ymax_outer_core, &
- absorb_zmin_outer_core)
- endif
-
- ! save/read the surface movie using the same c routine as we do for absorbing boundaries (file ID is 9)
- if (NOISE_TOMOGRAPHY/=0) then
- call close_file_abs(9)
- endif
-
-
- ! synchronize all processes
- call MPI_BARRIER(MPI_COMM_WORLD,ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error synchronize closing snapshots')
-
- ! save files to local disk or tape system if restart file
- call save_forward_arrays(myrank,SIMULATION_TYPE,SAVE_FORWARD, &
- NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN, &
- displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
- displ_inner_core,veloc_inner_core,accel_inner_core, &
- displ_outer_core,veloc_outer_core,accel_outer_core, &
- R_memory_crust_mantle,R_memory_inner_core, &
- epsilondev_crust_mantle,epsilondev_inner_core, &
- A_array_rotation,B_array_rotation, &
- LOCAL_PATH)
-
- ! synchronize all processes
- call MPI_BARRIER(MPI_COMM_WORLD,ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error synchronize saving forward')
-
- ! dump kernel arrays
- if (SIMULATION_TYPE == 3) then
- ! crust mantle
- call save_kernels_crust_mantle(myrank,scale_t,scale_displ, &
- cijkl_kl_crust_mantle,rho_kl_crust_mantle, &
- alpha_kl_crust_mantle,beta_kl_crust_mantle, &
- ystore_crust_mantle,zstore_crust_mantle, &
- rhostore_crust_mantle,muvstore_crust_mantle, &
- kappavstore_crust_mantle,ibool_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle, &
- eta_anisostore_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- LOCAL_PATH)
-
-!<YANGL
- ! noise strength kernel
- if (NOISE_TOMOGRAPHY == 3) then
- call save_kernels_strength_noise(myrank,LOCAL_PATH,Sigma_kl_crust_mantle)
- endif
-!>YANGL
-
- ! outer core
- call save_kernels_outer_core(myrank,scale_t,scale_displ, &
- rho_kl_outer_core,alpha_kl_outer_core, &
- rhostore_outer_core,kappavstore_outer_core, &
- deviatoric_outercore,nspec_beta_kl_outer_core,beta_kl_outer_core, &
- LOCAL_PATH)
-
- ! inner core
- call save_kernels_inner_core(myrank,scale_t,scale_displ, &
- rho_kl_inner_core,beta_kl_inner_core,alpha_kl_inner_core, &
- rhostore_inner_core,muvstore_inner_core,kappavstore_inner_core, &
- LOCAL_PATH)
-
- ! boundary kernel
- if (SAVE_BOUNDARY_MESH) then
- call save_kernels_boundary_kl(myrank,scale_t,scale_displ, &
- moho_kl,d400_kl,d670_kl,cmb_kl,icb_kl, &
- LOCAL_PATH,HONOR_1D_SPHERICAL_MOHO)
- endif
-
- ! approximate hessian
- if( APPROXIMATE_HESS_KL ) then
- call save_kernels_hessian(myrank,scale_t,scale_displ, &
- hess_kl_crust_mantle,LOCAL_PATH)
- endif
- endif
-
- ! save source derivatives for adjoint simulations
- if (SIMULATION_TYPE == 2 .and. nrec_local > 0) then
- call save_kernels_source_derivatives(nrec_local,NSOURCES,scale_displ,scale_t, &
- nu_source,moment_der,sloc_der,stshift_der,shdur_der,number_receiver_global)
- endif
-
- ! frees dynamically allocated memory
- ! mpi buffers
- deallocate(buffer_send_faces, &
- buffer_received_faces, &
- b_buffer_send_faces, &
- b_buffer_received_faces)
-
- ! central cube buffers
- deallocate(sender_from_slices_to_cube, &
- buffer_all_cube_from_slices, &
- b_buffer_all_cube_from_slices, &
- buffer_slices, &
- b_buffer_slices, &
- buffer_slices2, &
- ibool_central_cube)
-
- ! sources
- deallocate(islice_selected_source, &
- ispec_selected_source, &
- Mxx, &
- Myy, &
- Mzz, &
- Mxy, &
- Mxz, &
- Myz, &
- xi_source, &
- eta_source, &
- gamma_source, &
- tshift_cmt, &
- hdur, &
- hdur_gaussian, &
- theta_source, &
- phi_source, &
- nu_source)
- if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) deallocate(sourcearrays)
- if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
- deallocate(iadj_vec)
- if(nadj_rec_local > 0) then
- deallocate(adj_sourcearrays)
- deallocate(iadjsrc,iadjsrc_len)
- endif
- endif
-
- ! receivers
- deallocate(islice_selected_rec, &
- ispec_selected_rec, &
- xi_receiver, &
- eta_receiver, &
- gamma_receiver, &
- station_name, &
- network_name, &
- stlat, &
- stlon, &
- stele, &
- stbur, &
- nu, &
- number_receiver_global)
- if( nrec_local > 0 ) then
- deallocate(hxir_store, &
- hetar_store, &
- hgammar_store)
- if( SIMULATION_TYPE == 2 ) then
- deallocate(moment_der,stshift_der)
- endif
- endif
- deallocate(seismograms)
-
- if (SIMULATION_TYPE == 3) then
- if( APPROXIMATE_HESS_KL ) then
- deallocate(hess_kl_crust_mantle)
- endif
- deallocate(beta_kl_outer_core)
- endif
-
- ! movies
- if(MOVIE_SURFACE .or. NOISE_TOMOGRAPHY /= 0 ) then
- deallocate(store_val_x, &
- store_val_y, &
- store_val_z, &
- store_val_ux, &
- store_val_uy, &
- store_val_uz)
- if (MOVIE_SURFACE) then
- deallocate(store_val_x_all, &
- store_val_y_all, &
- store_val_z_all, &
- store_val_ux_all, &
- store_val_uy_all, &
- store_val_uz_all)
- endif
- endif
- if(MOVIE_VOLUME) then
- deallocate(nu_3dmovie)
- endif
-
- ! noise simulations
- if ( NOISE_TOMOGRAPHY /= 0 ) then
- deallocate(noise_sourcearray, &
- normal_x_noise, &
- normal_y_noise, &
- normal_z_noise, &
- mask_noise, &
- noise_surface_movie)
- endif
-
- ! close the main output file
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) 'End of the simulation'
- write(IMAIN,*)
- close(IMAIN)
- endif
-
- ! synchronize all the processes to make sure everybody has finished
- call MPI_BARRIER(MPI_COMM_WORLD,ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error synchronize finishing simulation')
-
! stop all the MPI processes, and exit
call MPI_FINALIZE(ier)
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,827 @@
+!=====================================================================
+!
+! S p e c f e m 3 D G l o b e V e r s i o n 5 . 1
+! --------------------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA
+! and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+!
+! United States and French Government Sponsorship Acknowledged.
+
+module constants
+
+ include "constants.h"
+
+ ! include values created by the mesher
+ ! done for performance only using static allocation to allow for loop unrolling
+ include "OUTPUT_FILES/values_from_mesher.h"
+
+end module constants
+
+!=====================================================================
+
+module specfem_par
+
+! main parameter module for specfem simulations
+
+ use constants
+
+ implicit none
+
+ !-----------------------------------------------------------------
+ ! attenuation parameters
+ !-----------------------------------------------------------------
+
+ ! memory variables and standard linear solids for attenuation
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval, betaval, gammaval
+ ! ADJOINT
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: b_alphaval, b_betaval, b_gammaval
+
+ !-----------------------------------------------------------------
+ ! topography/bathymetry & oceans
+ !-----------------------------------------------------------------
+
+ ! use integer array to store values
+ integer, dimension(NX_BATHY,NY_BATHY) :: ibathy_topo
+
+ ! additional mass matrix for ocean load
+ real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE_OCEANS) :: rmass_ocean_load
+
+ ! flag to mask ocean-bottom degrees of freedom for ocean load
+ logical, dimension(NGLOB_CRUST_MANTLE_OCEANS) :: updated_dof_ocean_load
+
+ !-----------------------------------------------------------------
+ ! ellipticity
+ !-----------------------------------------------------------------
+
+ ! for ellipticity
+ integer :: nspl
+ double precision :: rspl(NR),espl(NR),espl2(NR)
+
+ !-----------------------------------------------------------------
+ ! rotation
+ !-----------------------------------------------------------------
+
+ ! non-dimensionalized rotation rate of the Earth times two
+ real(kind=CUSTOM_REAL) :: two_omega_earth
+
+ ! for the Euler scheme for rotation
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
+ A_array_rotation,B_array_rotation
+
+ !ADJOINT
+ real(kind=CUSTOM_REAL) b_two_omega_earth
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROT_ADJOINT) :: &
+ b_A_array_rotation,b_B_array_rotation
+
+ !-----------------------------------------------------------------
+ ! gravity
+ !-----------------------------------------------------------------
+
+ ! lookup table every km for gravity
+ real(kind=CUSTOM_REAL) :: minus_g_cmb,minus_g_icb
+ double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table, &
+ minus_deriv_gravity_table,density_table,d_ln_density_dr_table,minus_rho_g_over_kappa_fluid
+
+ !-----------------------------------------------------------------
+ ! time scheme
+ !-----------------------------------------------------------------
+
+ integer :: it
+
+ ! Newmark time scheme parameters and non-dimensionalization
+ double precision :: scale_t,scale_t_inv,scale_displ,scale_veloc
+ real(kind=CUSTOM_REAL) :: deltat,deltatover2,deltatsqover2
+ ! ADJOINT
+ real(kind=CUSTOM_REAL) :: b_deltat,b_deltatover2,b_deltatsqover2
+
+#ifdef _HANDOPT
+ integer :: imodulo_NGLOB_CRUST_MANTLE,imodulo_NGLOB_CRUST_MANTLE4, &
+ imodulo_NGLOB_INNER_CORE,imodulo_NGLOB_OUTER_CORE
+#endif
+
+ !-----------------------------------------------------------------
+ ! sources
+ !-----------------------------------------------------------------
+
+ ! parameters for the source
+ integer :: NSOURCES,nsources_local
+ integer, dimension(:), allocatable :: islice_selected_source,ispec_selected_source
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: sourcearrays
+ double precision, dimension(:,:,:) ,allocatable:: nu_source
+
+ double precision, dimension(:), allocatable :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
+ double precision, dimension(:), allocatable :: xi_source,eta_source,gamma_source
+ double precision, dimension(:), allocatable :: tshift_cmt,hdur,hdur_gaussian
+ double precision, dimension(:), allocatable :: theta_source,phi_source
+ double precision, external :: comp_source_time_function
+ double precision :: t0
+
+ !-----------------------------------------------------------------
+ ! receivers
+ !-----------------------------------------------------------------
+
+ ! receiver information
+ integer :: nrec,nrec_local
+ integer, dimension(:), allocatable :: islice_selected_rec,ispec_selected_rec
+ integer, dimension(:), allocatable :: number_receiver_global
+ double precision, dimension(:), allocatable :: xi_receiver,eta_receiver,gamma_receiver
+ double precision, dimension(:,:,:), allocatable :: nu
+ double precision, allocatable, dimension(:) :: stlat,stlon,stele,stbur
+ character(len=MAX_LENGTH_STATION_NAME), dimension(:), allocatable :: station_name
+ character(len=MAX_LENGTH_NETWORK_NAME), dimension(:), allocatable :: network_name
+ character(len=150) :: STATIONS,rec_filename
+
+ ! Lagrange interpolators at receivers
+ double precision, dimension(:,:), allocatable :: hxir_store,hetar_store,hgammar_store
+
+ !ADJOINT
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:,:,:), allocatable :: adj_sourcearrays
+ integer :: nrec_simulation, nadj_rec_local
+ integer :: NSTEP_SUB_ADJ ! to read input in chunks
+ integer, dimension(:,:), allocatable :: iadjsrc ! to read input in chunks
+ integer, dimension(:), allocatable :: iadjsrc_len,iadj_vec
+ ! source frechet derivatives
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: moment_der
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: sloc_der
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: stshift_der, shdur_der
+ double precision, dimension(:,:), allocatable :: hpxir_store,hpetar_store,hpgammar_store
+ integer :: nadj_hprec_local
+
+ !-----------------------------------------------------------------
+ ! seismograms
+ !-----------------------------------------------------------------
+
+ ! seismograms
+ integer :: it_begin,it_end,nit_written
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: seismograms
+ integer :: seismo_offset, seismo_current
+
+ ! for SAC headers for seismograms
+ integer :: yr_SAC,jda_SAC,ho_SAC,mi_SAC
+ real :: mb_SAC
+ double precision :: t_cmt_SAC,t_shift_SAC,elat_SAC,elon_SAC,depth_SAC, &
+ cmt_lat_SAC,cmt_lon_SAC,cmt_depth_SAC,cmt_hdur_SAC,sec_SAC
+ character(len=20) :: event_name_SAC
+
+
+ !-----------------------------------------------------------------
+ ! GLL points & weights
+ !-----------------------------------------------------------------
+
+ ! Gauss-Lobatto-Legendre points of integration and weights
+ double precision, dimension(NGLLX) :: xigll,wxgll
+ double precision, dimension(NGLLY) :: yigll,wygll
+ double precision, dimension(NGLLZ) :: zigll,wzgll
+
+ ! product of weights for gravity term
+ double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
+
+ ! array with derivatives of Lagrange polynomials and precalculated products
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xxT,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
+ real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+ real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+
+ !-----------------------------------------------------------------
+ ! file parameters
+ !-----------------------------------------------------------------
+
+ ! parameters read from parameter file
+ integer MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD,NER_CRUST, &
+ NER_80_MOHO,NER_220_80,NER_400_220,NER_600_400,NER_670_600,NER_771_670, &
+ NER_TOPDDOUBLEPRIME_771,NER_CMB_TOPDDOUBLEPRIME,NER_OUTER_CORE, &
+ NER_TOP_CENTRAL_CUBE_ICB,NEX_XI,NEX_ETA, &
+ NTSTEP_BETWEEN_OUTPUT_SEISMOS,&
+ NTSTEP_BETWEEN_READ_ADJSRC,NSTEP,NTSTEP_BETWEEN_FRAMES, &
+ NTSTEP_BETWEEN_OUTPUT_INFO,NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN,SIMULATION_TYPE, &
+ MOVIE_VOLUME_TYPE,MOVIE_START,MOVIE_STOP,NOISE_TOMOGRAPHY
+
+ double precision DT,ROCEAN,RMIDDLE_CRUST, &
+ RMOHO,R80,R220,R400,R600,R670,R771,RTOPDDOUBLEPRIME,RCMB,RICB, &
+ RHO_TOP_OC,RHO_BOTTOM_OC,RHO_OCEANS,HDUR_MOVIE, &
+ MOVIE_TOP,MOVIE_BOTTOM,MOVIE_WEST,MOVIE_EAST,MOVIE_NORTH,MOVIE_SOUTH, &
+ ANGULAR_WIDTH_XI_IN_DEGREES
+
+ logical ONE_CRUST,TOPOGRAPHY,MOVIE_SURFACE,MOVIE_VOLUME,MOVIE_COARSE, &
+ RECEIVERS_CAN_BE_BURIED,PRINT_SOURCE_TIME_FUNCTION, &
+ SAVE_MESH_FILES,ABSORBING_CONDITIONS,INCLUDE_CENTRAL_CUBE,SAVE_FORWARD, &
+ OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM,OUTPUT_SEISMOS_SAC_BINARY, &
+ ROTATE_SEISMOGRAMS_RT,HONOR_1D_SPHERICAL_MOHO,WRITE_SEISMOGRAMS_BY_MASTER,&
+ SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE
+
+ character(len=150) OUTPUT_FILES,LOCAL_PATH,MODEL
+ ! process/partition name
+ character(len=150) :: prname
+
+
+ !-----------------------------------------------------------------
+ ! mesh
+ !-----------------------------------------------------------------
+
+ ! this for all the regions
+ integer, dimension(MAX_NUM_REGIONS) :: NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX, &
+ NSPEC2D_BOTTOM,NSPEC2D_TOP, &
+ NGLOB1D_RADIAL, &
+ NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX
+
+ ! computed in read_compute_parameters
+ integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: ner,ratio_sampling_array
+ integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: doubling_index
+ double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: r_bottom,r_top
+ logical, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: this_region_has_a_doubling
+ double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: rmins,rmaxs
+
+ ! temporary arrays for elements on slices or edges
+ logical, dimension(:),allocatable :: is_on_a_slice_edge_crust_mantle, &
+ is_on_a_slice_edge_inner_core,is_on_a_slice_edge_outer_core
+
+ !-----------------------------------------------------------------
+ ! MPI partitions
+ !-----------------------------------------------------------------
+
+ ! for addressing of the slices
+ integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+ integer, dimension(0:NPROCTOT_VAL-1) :: ichunk_slice,iproc_xi_slice,iproc_eta_slice
+
+ ! proc numbers for MPI
+ integer :: myrank
+ integer :: ichunk,iproc_xi,iproc_eta
+
+ ! time loop timing
+ double precision :: time_start,tCPU
+
+ !-----------------------------------------------------------------
+ ! assembly
+ !-----------------------------------------------------------------
+
+ ! ---- arrays to assemble between chunks
+ ! communication pattern for faces between chunks
+ integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces,imsg_type
+ ! communication pattern for corners between chunks
+ integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+ ! indirect addressing for each message for faces and corners of the chunks
+ ! a given slice can belong to at most one corner and at most two faces
+ integer :: NGLOB2DMAX_XY
+
+ ! this for non blocking MPI
+
+ ! buffers for send and receive between faces of the slices and the chunks
+ ! we use the same buffers to assemble scalars and vectors because vectors are
+ ! always three times bigger and therefore scalars can use the first part
+ ! of the vector buffer in memory even if it has an additional index here
+ integer :: npoin2D_max_all_CM_IC
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_faces,buffer_received_faces, &
+ b_buffer_send_faces,b_buffer_received_faces
+
+ ! buffers for send and receive between corners of the chunks
+ real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL_CM) :: &
+ buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+ b_buffer_send_chunkcorn_scalar,b_buffer_recv_chunkcorn_scalar
+
+ ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
+ buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
+ b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector
+
+ ! request ids for non-blocking MPI
+ integer :: request_send,request_receive
+ integer, dimension(NUMFACES_SHARED) :: request_send_array,request_receive_array
+ integer :: request_send_cc,request_receive_cc
+ integer, dimension(NPROC_XI_VAL+4) :: request_send_array_cc,request_receive_array_cc
+
+ integer :: b_request_send,b_request_receive
+ integer, dimension(NUMFACES_SHARED) :: b_request_send_array,b_request_receive_array
+ integer :: b_request_send_cc,b_request_receive_cc
+ integer, dimension(NPROC_XI_VAL+4) :: b_request_send_array_cc,b_request_receive_array_cc
+
+
+ logical, dimension(NGLOB_CRUST_MANTLE) :: mask_ibool
+
+ ! number of faces between chunks
+ integer :: NUMMSGS_FACES
+
+ ! number of corners between chunks
+ integer :: NCORNERSCHUNKS
+
+ ! number of message types
+ integer :: NUM_MSG_TYPES
+
+
+ ! collected MPI interfaces
+ ! MPI crust/mantle mesh
+ integer :: num_interfaces_crust_mantle
+ integer :: max_nibool_interfaces_crust_mantle
+ integer, dimension(:), allocatable :: my_neighbours_crust_mantle,nibool_interfaces_crust_mantle
+ integer, dimension(:,:), allocatable :: ibool_interfaces_crust_mantle
+
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: b_buffer_send_vector_crust_mantle,b_buffer_recv_vector_crust_mantle
+
+ integer, dimension(:), allocatable :: request_send_vector_crust_mantle,request_recv_vector_crust_mantle
+ integer, dimension(:), allocatable :: b_request_send_vector_crust_mantle,b_request_recv_vector_crust_mantle
+
+ ! MPI inner core mesh
+ integer :: num_interfaces_inner_core
+ integer :: max_nibool_interfaces_inner_core
+ integer, dimension(:), allocatable :: my_neighbours_inner_core,nibool_interfaces_inner_core
+ integer, dimension(:,:), allocatable :: ibool_interfaces_inner_core
+
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_vector_inner_core,buffer_recv_vector_inner_core
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core
+
+ integer, dimension(:), allocatable :: request_send_vector_inner_core,request_recv_vector_inner_core
+ integer, dimension(:), allocatable :: b_request_send_vector_inner_core,b_request_recv_vector_inner_core
+
+ ! MPI outer core mesh
+ integer :: num_interfaces_outer_core
+ integer :: max_nibool_interfaces_outer_core
+ integer, dimension(:), allocatable :: my_neighbours_outer_core,nibool_interfaces_outer_core
+ integer, dimension(:,:), allocatable :: ibool_interfaces_outer_core
+
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core
+
+ integer, dimension(:), allocatable :: request_send_scalar_outer_core,request_recv_scalar_outer_core
+ integer, dimension(:), allocatable :: b_request_send_scalar_outer_core,b_request_recv_scalar_outer_core
+
+ !-----------------------------------------------------------------
+ ! gpu
+ !-----------------------------------------------------------------
+
+ ! CUDA mesh pointer<->integer wrapper
+ integer(kind=8) :: Mesh_pointer
+ logical :: GPU_MODE
+
+end module specfem_par
+
+
+!=====================================================================
+
+module specfem_par_crustmantle
+
+! parameter module for elastic solver in crust/mantle region
+
+ use constants
+ implicit none
+
+ ! ----------------- crust, mantle and oceans ---------------------
+ ! mesh parameters
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle,&
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle
+
+ real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: &
+ xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle
+
+ ! arrays for isotropic elements stored only where needed to save space
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle
+
+ ! arrays for anisotropic elements stored only where needed to save space
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle
+
+ ! arrays for full anisotropy only when needed
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
+ c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
+ c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
+ c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
+ c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
+ c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle
+
+ ! local to global mapping
+ ! integer, dimension(NSPEC_CRUST_MANTLE) :: idoubling_crust_mantle
+ logical, dimension(NSPEC_CRUST_MANTLE) :: ispec_is_tiso_crust_mantle
+
+ ! mass matrix
+ real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: rmass_crust_mantle
+
+ ! displacement, velocity, acceleration
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
+ displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle
+ ! ADJOINT
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
+ b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle
+
+ ! memory variables and standard linear solids for attenuation
+ real(kind=CUSTOM_REAL), dimension(ATT1,ATT2,ATT3,ATT4) :: &
+ one_minus_sum_beta_crust_mantle, factor_scale_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1,ATT2,ATT3,ATT4) :: &
+ factor_common_crust_mantle
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
+! R_memory_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
+ R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle,R_xz_crust_mantle,R_yz_crust_mantle
+
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
+! epsilondev_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STRAIN_ONLY) :: &
+ eps_trace_over_3_crust_mantle
+
+ ! ADJOINT
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT) :: &
+! b_R_memory_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT) :: &
+ b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle,b_R_xz_crust_mantle,b_R_yz_crust_mantle
+
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+! b_epsilondev_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+ b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,b_epsilondev_xy_crust_mantle, &
+ b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+ b_eps_trace_over_3_crust_mantle
+
+ ! for crust/oceans coupling
+ integer, dimension(NSPEC2DMAX_XMIN_XMAX_CM) :: ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle
+ integer, dimension(NSPEC2DMAX_YMIN_YMAX_CM) :: ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle
+ integer, dimension(NSPEC2D_BOTTOM_CM) :: ibelm_bottom_crust_mantle
+ integer, dimension(NSPEC2D_TOP_CM) :: ibelm_top_crust_mantle
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_CM) :: &
+ jacobian2D_bottom_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_CM) :: &
+ jacobian2D_top_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_CM) :: &
+ jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_CM) :: &
+ jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_CM) :: &
+ normal_xmin_crust_mantle,normal_xmax_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2DMAX_YMIN_YMAX_CM) :: &
+ normal_ymin_crust_mantle,normal_ymax_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_CM) :: &
+ normal_bottom_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_CM) :: &
+ normal_top_crust_mantle
+
+ ! Stacey
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STACEY) :: &
+ rho_vp_crust_mantle,rho_vs_crust_mantle
+ integer :: nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
+ nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle
+ integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_CM) :: nimin_crust_mantle,nimax_crust_mantle,nkmin_eta_crust_mantle
+ integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_CM) :: njmin_crust_mantle,njmax_crust_mantle,nkmin_xi_crust_mantle
+
+ ! daniel: not sure why name ...5 and dimensions with one additional?
+ !real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: absorb_xmin_crust_mantle5, &
+ ! absorb_xmax_crust_mantle5, absorb_ymin_crust_mantle5, absorb_ymax_crust_mantle5
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: absorb_xmin_crust_mantle, &
+ absorb_xmax_crust_mantle, absorb_ymin_crust_mantle, absorb_ymax_crust_mantle
+
+ integer :: nabs_xmin_cm,nabs_xmax_cm,nabs_ymin_cm,nabs_ymax_cm
+
+ integer :: reclen_xmin_crust_mantle, reclen_xmax_crust_mantle, &
+ reclen_ymin_crust_mantle,reclen_ymax_crust_mantle
+
+ ! assembly
+ integer :: npoin2D_faces_crust_mantle(NUMFACES_SHARED)
+ integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle
+
+ ! indirect addressing for each corner of the chunks
+ integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
+
+ ! 2-D addressing and buffers for summation between slices
+ integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
+ integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
+
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
+
+ ! kernels
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+ rho_kl_crust_mantle,beta_kl_crust_mantle, alpha_kl_crust_mantle, Sigma_kl_crust_mantle
+ ! For anisotropic kernels (see compute_kernels.f90 for a definition of the array)
+ real(kind=CUSTOM_REAL), dimension(21,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+ cijkl_kl_crust_mantle
+ ! approximate hessian
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:),allocatable :: hess_kl_crust_mantle
+
+ ! Boundary Mesh and Kernels
+ integer :: k_top,k_bot,iregion_code
+ integer, dimension(NSPEC2D_MOHO) :: ibelm_moho_top,ibelm_moho_bot
+ integer, dimension(NSPEC2D_400) :: ibelm_400_top,ibelm_400_bot
+ integer, dimension(NSPEC2D_670) :: ibelm_670_top,ibelm_670_bot
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_MOHO) :: normal_moho
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_400) :: normal_400
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_670) :: normal_670
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_MOHO) :: moho_kl, moho_kl_top, moho_kl_bot
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_400) :: d400_kl, d400_kl_top, d400_kl_bot
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_670) :: d670_kl, d670_kl_top, d670_kl_bot
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_CMB) :: cmb_kl, cmb_kl_top, cmb_kl_bot
+
+ ! NOISE_TOMOGRAPHY
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: noise_sourcearray
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
+ normal_x_noise,normal_y_noise,normal_z_noise, mask_noise
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: noise_surface_movie
+ integer :: irec_master_noise
+
+ ! inner / outer elements crust/mantle region
+ integer :: num_phase_ispec_crust_mantle
+ integer :: nspec_inner_crust_mantle,nspec_outer_crust_mantle
+ integer, dimension(:,:), allocatable :: phase_ispec_inner_crust_mantle
+
+end module specfem_par_crustmantle
+
+!=====================================================================
+
+module specfem_par_innercore
+
+! parameter module for elastic solver in inner core region
+
+ use constants
+ implicit none
+
+ ! ----------------- inner core ---------------------
+ ! mesh parameters
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
+ xix_inner_core,xiy_inner_core,xiz_inner_core,&
+ etax_inner_core,etay_inner_core,etaz_inner_core, &
+ gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
+ rhostore_inner_core, kappavstore_inner_core,muvstore_inner_core
+
+ real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core
+
+ ! arrays for inner-core anisotropy only when needed
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_IC) :: &
+ c11store_inner_core,c33store_inner_core,c12store_inner_core, &
+ c13store_inner_core,c44store_inner_core
+
+ ! local to global mapping
+ integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
+ ! only needed for compute_boundary_kernel() routine
+ !logical, dimension(NSPEC_INNER_CORE) :: ispec_is_tiso_inner_core
+
+ ! mass matrix
+ real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: rmass_inner_core
+
+ ! displacement, velocity, acceleration
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: &
+ displ_inner_core,veloc_inner_core,accel_inner_core
+ ! ADJOINT
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE_ADJOINT) :: &
+ b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core
+
+ ! memory variables and standard linear solids for attenuation
+ real(kind=CUSTOM_REAL), dimension(ATT1,ATT2,ATT3,ATT5) :: &
+ one_minus_sum_beta_inner_core, factor_scale_inner_core
+ real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1,ATT2,ATT3,ATT5) :: &
+ factor_common_inner_core
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
+! R_memory_inner_core
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
+ R_xx_inner_core,R_yy_inner_core,R_xy_inner_core,R_xz_inner_core,R_yz_inner_core
+
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
+! epsilondev_inner_core
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+ epsilondev_xz_inner_core,epsilondev_yz_inner_core
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STRAIN_ONLY) :: &
+ eps_trace_over_3_inner_core
+ ! ADJOINT
+! real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT) :: &
+! b_R_memory_inner_core
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT) :: &
+ b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core,b_R_xz_inner_core,b_R_yz_inner_core
+
+
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
+! b_epsilondev_inner_core
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
+ b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+ b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core
+
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
+ b_eps_trace_over_3_inner_core
+
+ ! assembly
+ ! for matching with central cube in inner core
+ integer, dimension(:), allocatable :: sender_from_slices_to_cube
+ integer, dimension(:,:), allocatable :: ibool_central_cube
+ double precision, dimension(:,:), allocatable :: buffer_slices,b_buffer_slices,buffer_slices2
+ double precision, dimension(:,:,:), allocatable :: buffer_all_cube_from_slices,b_buffer_all_cube_from_slices
+ integer nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,receiver_cube_from_slices
+
+ integer :: nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
+ nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
+
+ integer, dimension(NSPEC2DMAX_XMIN_XMAX_IC) :: ibelm_xmin_inner_core,ibelm_xmax_inner_core
+ integer, dimension(NSPEC2DMAX_YMIN_YMAX_IC) :: ibelm_ymin_inner_core,ibelm_ymax_inner_core
+ integer, dimension(NSPEC2D_BOTTOM_IC) :: ibelm_bottom_inner_core
+ integer, dimension(NSPEC2D_TOP_IC) :: ibelm_top_inner_core
+
+ integer :: npoin2D_faces_inner_core(NUMFACES_SHARED)
+ integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_inner_core,npoin2D_eta_inner_core
+
+ ! indirect addressing for each corner of the chunks
+ integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
+
+ ! 2-D addressing and buffers for summation between slices
+ integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
+ integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
+
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
+
+ ! adjoint kernels
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
+ rho_kl_inner_core,beta_kl_inner_core, alpha_kl_inner_core
+
+ ! Boundary Mesh and Kernels
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_ICB) :: icb_kl, icb_kl_top, icb_kl_bot
+ logical :: fluid_solid_boundary
+
+ ! inner / outer elements inner core region
+ integer :: num_phase_ispec_inner_core
+ integer :: nspec_inner_inner_core,nspec_outer_inner_core
+ integer, dimension(:,:), allocatable :: phase_ispec_inner_inner_core
+
+
+end module specfem_par_innercore
+
+!=====================================================================
+
+module specfem_par_outercore
+
+! parameter module for acoustic solver in outer core region
+
+ use constants
+ implicit none
+
+ ! ----------------- outer core ---------------------
+ ! mesh parameters
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: &
+ xix_outer_core,xiy_outer_core,xiz_outer_core,&
+ etax_outer_core,etay_outer_core,etaz_outer_core, &
+ gammax_outer_core,gammay_outer_core,gammaz_outer_core
+
+ real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: &
+ xstore_outer_core,ystore_outer_core,zstore_outer_core
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: &
+ rhostore_outer_core,kappavstore_outer_core
+
+ ! local to global mapping
+ !integer, dimension(NSPEC_OUTER_CORE) :: idoubling_outer_core
+ ! only needed for compute_boundary_kernel()
+ !logical, dimension(NSPEC_OUTER_CORE) :: ispec_is_tiso_outer_core
+
+ ! mass matrix
+ real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: rmass_outer_core
+
+ ! velocity potential
+ real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: &
+ displ_outer_core,veloc_outer_core,accel_outer_core
+ ! ADJOINT
+ real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE_ADJOINT) :: &
+ b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core
+
+
+ ! Stacey
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_STACEY) :: vp_outer_core
+ integer :: nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
+ nspec2D_ymin_outer_core,nspec2D_ymax_outer_core
+ integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_OC) :: nimin_outer_core,nimax_outer_core,nkmin_eta_outer_core
+ integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_OC) :: njmin_outer_core,njmax_outer_core,nkmin_xi_outer_core
+
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: absorb_xmin_outer_core, &
+ absorb_xmax_outer_core, absorb_ymin_outer_core, absorb_ymax_outer_core, &
+ absorb_zmin_outer_core
+ integer :: nabs_xmin_oc,nabs_xmax_oc,nabs_ymin_oc,nabs_ymax_oc,nabs_zmin_oc
+
+ integer :: reclen_xmin_outer_core, reclen_xmax_outer_core, &
+ reclen_ymin_outer_core, reclen_ymax_outer_core
+ integer :: reclen_zmin
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_OUTER_CORE) :: vector_accel_outer_core,&
+ vector_displ_outer_core, b_vector_displ_outer_core
+
+ ! arrays to couple with the fluid regions by pointwise matching
+ integer, dimension(NSPEC2DMAX_XMIN_XMAX_OC) :: ibelm_xmin_outer_core,ibelm_xmax_outer_core
+ integer, dimension(NSPEC2DMAX_YMIN_YMAX_OC) :: ibelm_ymin_outer_core,ibelm_ymax_outer_core
+ integer, dimension(NSPEC2D_BOTTOM_OC) :: ibelm_bottom_outer_core
+ integer, dimension(NSPEC2D_TOP_OC) :: ibelm_top_outer_core
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_OC) :: normal_xmin_outer_core,normal_xmax_outer_core
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_OC) :: normal_ymin_outer_core,normal_ymax_outer_core
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: normal_bottom_outer_core
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_OC) :: normal_top_outer_core
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: jacobian2D_bottom_outer_core
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_OC) :: jacobian2D_top_outer_core
+ real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_OC) :: jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_OC) :: jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core
+
+ ! assembly
+ integer :: npoin2D_faces_outer_core(NUMFACES_SHARED)
+ integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_outer_core,npoin2D_eta_outer_core
+
+ ! indirect addressing for each corner of the chunks
+ integer, dimension(NGLOB1D_RADIAL_OC,NUMCORNERS_SHARED) :: iboolcorner_outer_core
+
+ ! 2-D addressing and buffers for summation between slices
+ integer, dimension(NGLOB2DMAX_XMIN_XMAX_OC) :: iboolleft_xi_outer_core,iboolright_xi_outer_core
+ integer, dimension(NGLOB2DMAX_YMIN_YMAX_OC) :: iboolleft_eta_outer_core,iboolright_eta_outer_core
+
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_outer_core
+
+ ! adjoint kernels
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: &
+ rho_kl_outer_core,alpha_kl_outer_core
+
+ ! kernel runs
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: &
+ div_displ_outer_core
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: &
+ b_div_displ_outer_core
+
+ ! check for deviatoric kernel for outer core region
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:),allocatable :: beta_kl_outer_core
+ integer :: nspec_beta_kl_outer_core
+ logical,parameter:: deviatoric_outercore = .false.
+
+ ! inner / outer elements outer core region
+ integer :: num_phase_ispec_outer_core
+ integer :: nspec_inner_outer_core,nspec_outer_outer_core
+ integer, dimension(:,:), allocatable :: phase_ispec_inner_outer_core
+
+
+end module specfem_par_outercore
+
+
+!=====================================================================
+
+module specfem_par_movie
+
+! parameter module for movies/shakemovies
+
+ use constants
+
+ implicit none
+
+ ! to save movie frames
+ integer :: nmovie_points,NIT
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
+ store_val_x,store_val_y,store_val_z, &
+ store_val_ux,store_val_uy,store_val_uz
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: &
+ store_val_x_all,store_val_y_all,store_val_z_all, &
+ store_val_ux_all,store_val_uy_all,store_val_uz_all
+
+ ! to save movie volume
+ integer :: npoints_3dmovie,nspecel_3dmovie
+ integer, dimension(NGLOB_CRUST_MANTLE) :: num_ibool_3dmovie
+ double precision :: scalingval
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
+ muvstore_crust_mantle_3dmovie
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: nu_3dmovie
+ logical, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STRAIN_ONLY) :: mask_3dmovie
+
+ real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
+ Iepsilondev_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STRAIN_ONLY) :: &
+ Ieps_trace_over_3_crust_mantle
+
+end module specfem_par_movie
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90 (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -0,0 +1,142 @@
+!=====================================================================
+!
+! S p e c f e m 3 D G l o b e V e r s i o n 5 . 1
+! --------------------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA
+! and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine write_movie_output()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ use specfem_par_movie
+ implicit none
+
+ ! local parameters
+!daniel: debugging
+! character(len=256) :: filename
+! logical, parameter :: SNAPSHOT_INNER_CORE = .true.
+
+ ! save movie on surface
+ if( MOVIE_SURFACE ) then
+ if( mod(it,NTSTEP_BETWEEN_FRAMES) == 0) then
+ ! save velocity here to avoid static offset on displacement for movies
+ call write_movie_surface(myrank,nmovie_points,scale_veloc,veloc_crust_mantle, &
+ scale_displ,displ_crust_mantle, &
+ xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+ store_val_x,store_val_y,store_val_z, &
+ store_val_x_all,store_val_y_all,store_val_z_all, &
+ store_val_ux,store_val_uy,store_val_uz, &
+ store_val_ux_all,store_val_uy_all,store_val_uz_all, &
+ ibelm_top_crust_mantle,ibool_crust_mantle, &
+ NSPEC2D_TOP(IREGION_CRUST_MANTLE), &
+ NIT,it,OUTPUT_FILES,MOVIE_VOLUME_TYPE)
+ endif
+ endif
+
+
+ ! save movie in full 3D mesh
+ if(MOVIE_VOLUME ) then
+ if( mod(it-MOVIE_START,NTSTEP_BETWEEN_FRAMES) == 0 &
+ .and. it >= MOVIE_START .and. it <= MOVIE_STOP) then
+
+ if (MOVIE_VOLUME_TYPE == 1) then ! output strains
+
+ call write_movie_volume_strains(myrank,npoints_3dmovie, &
+ LOCAL_PATH,MOVIE_VOLUME_TYPE,MOVIE_COARSE, &
+ it,eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ muvstore_crust_mantle_3dmovie, &
+ mask_3dmovie,nu_3dmovie)
+
+ else if (MOVIE_VOLUME_TYPE == 2 .or. MOVIE_VOLUME_TYPE == 3) then
+ ! output the Time Integral of Strain, or \mu*TIS
+ call write_movie_volume_strains(myrank,npoints_3dmovie, &
+ LOCAL_PATH,MOVIE_VOLUME_TYPE,MOVIE_COARSE, &
+ it,Ieps_trace_over_3_crust_mantle,Iepsilondev_crust_mantle, &
+ muvstore_crust_mantle_3dmovie, &
+ mask_3dmovie,nu_3dmovie)
+
+ else if (MOVIE_VOLUME_TYPE == 4) then ! output divergence and curl in whole volume
+
+ call write_movie_volume_divcurl(myrank,it,eps_trace_over_3_crust_mantle,&
+ div_displ_outer_core, &
+ accel_outer_core,kappavstore_outer_core,rhostore_outer_core,ibool_outer_core, &
+ eps_trace_over_3_inner_core, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+ epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
+ LOCAL_PATH, &
+ displ_crust_mantle,displ_inner_core,displ_outer_core, &
+ veloc_crust_mantle,veloc_inner_core,veloc_outer_core, &
+ accel_crust_mantle,accel_inner_core, &
+ ibool_crust_mantle,ibool_inner_core)
+
+ else if (MOVIE_VOLUME_TYPE == 5) then !output displacement
+ scalingval = scale_displ
+ call write_movie_volume_vector(myrank,it,npoints_3dmovie, &
+ LOCAL_PATH,MOVIE_VOLUME_TYPE, &
+ MOVIE_COARSE,ibool_crust_mantle,displ_crust_mantle, &
+ scalingval,mask_3dmovie,nu_3dmovie)
+
+ else if (MOVIE_VOLUME_TYPE == 6) then !output velocity
+ scalingval = scale_veloc
+ call write_movie_volume_vector(myrank,it,npoints_3dmovie, &
+ LOCAL_PATH,MOVIE_VOLUME_TYPE, &
+ MOVIE_COARSE,ibool_crust_mantle,veloc_crust_mantle, &
+ scalingval,mask_3dmovie,nu_3dmovie)
+
+ else
+
+ call exit_MPI(myrank, 'MOVIE_VOLUME_TYPE has to be 1,2,3,4,5 or 6')
+
+ endif ! MOVIE_VOLUME_TYPE
+ endif
+ endif ! MOVIE_VOLUME
+
+!daniel: debugging
+! if( SNAPSHOT_INNER_CORE .and. mod(it-MOVIE_START,NTSTEP_BETWEEN_FRAMES) == 0 &
+! .and. it >= MOVIE_START .and. it <= MOVIE_STOP) then
+! ! VTK file output
+! ! displacement values
+! !write(prname,'(a,i6.6,a)') trim(LOCAL_PATH)//'/'//'proc',myrank,'_'
+! !write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_3_displ_',it
+! !call write_VTK_data_cr(idoubling_inner_core,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+! ! xstore_inner_core,ystore_inner_core,zstore_inner_core,ibool_inner_core, &
+! ! displ_inner_core,filename)
+!
+! write(prname,'(a)') 'OUTPUT_FILES/snapshot_all_'
+! write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_3_displ_',it
+! call write_VTK_data_cr_all(myrank,idoubling_inner_core, &
+! NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+! xstore_inner_core,ystore_inner_core,zstore_inner_core,ibool_inner_core, &
+! displ_inner_core,filename)
+!
+! endif
+
+
+ end subroutine write_movie_output
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_volume.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_volume.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_volume.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -256,8 +256,11 @@
! ---------------------------------------------
subroutine write_movie_volume_strains(myrank,npoints_3dmovie,LOCAL_PATH,MOVIE_VOLUME_TYPE,MOVIE_COARSE, &
- it,eps_trace_over_3_crust_mantle,epsilondev_crust_mantle,muvstore_crust_mantle_3dmovie, &
- mask_3dmovie,nu_3dmovie)
+ it,eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ muvstore_crust_mantle_3dmovie, &
+ mask_3dmovie,nu_3dmovie)
implicit none
@@ -268,7 +271,12 @@
! input
integer :: myrank,npoints_3dmovie,MOVIE_VOLUME_TYPE,it
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: eps_trace_over_3_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilondev_crust_mantle
+
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilondev_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle
+
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: muvstore_crust_mantle_3dmovie
logical, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: mask_3dmovie
logical :: MOVIE_COARSE
@@ -316,13 +324,13 @@
if(mask_3dmovie(i,j,k,ispec)) then
ipoints_3dmovie=ipoints_3dmovie+1
muv_3dmovie=muvstore_crust_mantle_3dmovie(i,j,k,ispec)
- eps_loc(1,1)=eps_trace_over_3_crust_mantle(i,j,k,ispec) + epsilondev_crust_mantle(1,i,j,k,ispec)
- eps_loc(2,2)=eps_trace_over_3_crust_mantle(i,j,k,ispec) + epsilondev_crust_mantle(2,i,j,k,ispec)
+ eps_loc(1,1)=eps_trace_over_3_crust_mantle(i,j,k,ispec) + epsilondev_xx_crust_mantle(i,j,k,ispec)
+ eps_loc(2,2)=eps_trace_over_3_crust_mantle(i,j,k,ispec) + epsilondev_yy_crust_mantle(i,j,k,ispec)
eps_loc(3,3)=eps_trace_over_3_crust_mantle(i,j,k,ispec)- &
- epsilondev_crust_mantle(1,i,j,k,ispec) - epsilondev_crust_mantle(2,i,j,k,ispec)
- eps_loc(1,2)=epsilondev_crust_mantle(3,i,j,k,ispec)
- eps_loc(1,3)=epsilondev_crust_mantle(4,i,j,k,ispec)
- eps_loc(2,3)=epsilondev_crust_mantle(5,i,j,k,ispec)
+ epsilondev_xx_crust_mantle(i,j,k,ispec) - epsilondev_yy_crust_mantle(i,j,k,ispec)
+ eps_loc(1,2)=epsilondev_xy_crust_mantle(i,j,k,ispec)
+ eps_loc(1,3)=epsilondev_xz_crust_mantle(i,j,k,ispec)
+ eps_loc(2,3)=epsilondev_yz_crust_mantle(i,j,k,ispec)
eps_loc(2,1)=eps_loc(1,2)
eps_loc(3,1)=eps_loc(1,3)
eps_loc(3,2)=eps_loc(2,3)
@@ -469,15 +477,18 @@
!--------------------
subroutine write_movie_volume_divcurl(myrank,it,eps_trace_over_3_crust_mantle,&
- div_displ_outer_core, &
- accel_outer_core,kappavstore_outer_core,rhostore_outer_core,ibool_outer_core, &
- eps_trace_over_3_inner_core, &
- epsilondev_crust_mantle,epsilondev_inner_core, &
- LOCAL_PATH, &
- displ_crust_mantle,displ_inner_core,displ_outer_core, &
- veloc_crust_mantle,veloc_inner_core,veloc_outer_core, &
- accel_crust_mantle,accel_inner_core, &
- ibool_crust_mantle,ibool_inner_core)
+ div_displ_outer_core, &
+ accel_outer_core,kappavstore_outer_core,rhostore_outer_core,ibool_outer_core, &
+ eps_trace_over_3_inner_core, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+ epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
+ LOCAL_PATH, &
+ displ_crust_mantle,displ_inner_core,displ_outer_core, &
+ veloc_crust_mantle,veloc_inner_core,veloc_outer_core, &
+ accel_crust_mantle,accel_inner_core, &
+ ibool_crust_mantle,ibool_inner_core)
implicit none
include "constants.h"
@@ -493,9 +504,17 @@
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STRAIN_ONLY) :: eps_trace_over_3_inner_core
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilondev_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: epsilondev_inner_core
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilondev_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle
+! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: epsilondev_inner_core
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
+ epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+ epsilondev_xz_inner_core,epsilondev_yz_inner_core
+
+
character(len=150) LOCAL_PATH
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: displ_crust_mantle
@@ -587,12 +606,20 @@
! crust mantle
write(outputname,"('proc',i6.6,'_crust_mantle_epsdev_displ_it',i6.6,'.bin')") myrank,it
open(unit=27,file=trim(LOCAL_PATH)//trim(outputname),status='unknown',form='unformatted')
- write(27) epsilondev_crust_mantle
+ write(27) epsilondev_xx_crust_mantle
+ write(27) epsilondev_yy_crust_mantle
+ write(27) epsilondev_xy_crust_mantle
+ write(27) epsilondev_xz_crust_mantle
+ write(27) epsilondev_yz_crust_mantle
close(27)
! inner core
write(outputname,"('proc',i6.6,'inner_core_epsdev_displ_it',i6.6,'.bin')") myrank,it
open(unit=27,file=trim(LOCAL_PATH)//trim(outputname),status='unknown',form='unformatted')
- write(27) epsilondev_inner_core
+ write(27) epsilondev_xx_inner_core
+ write(27) epsilondev_yy_inner_core
+ write(27) epsilondev_xy_inner_core
+ write(27) epsilondev_xz_inner_core
+ write(27) epsilondev_yz_inner_core
close(27)
endif
@@ -609,11 +636,11 @@
do k = 1, NGLLZ
do j = 1, NGLLY
do i = 1, NGLLX
- tmp_data(i,j,k,ispec) = sqrt( epsilondev_crust_mantle(1,i,j,k,ispec)**2 &
- + epsilondev_crust_mantle(2,i,j,k,ispec)**2 &
- + epsilondev_crust_mantle(3,i,j,k,ispec)**2 &
- + epsilondev_crust_mantle(4,i,j,k,ispec)**2 &
- + epsilondev_crust_mantle(5,i,j,k,ispec)**2)
+ tmp_data(i,j,k,ispec) = sqrt( epsilondev_xx_crust_mantle(i,j,k,ispec)**2 &
+ + epsilondev_yy_crust_mantle(i,j,k,ispec)**2 &
+ + epsilondev_xy_crust_mantle(i,j,k,ispec)**2 &
+ + epsilondev_xz_crust_mantle(i,j,k,ispec)**2 &
+ + epsilondev_yz_crust_mantle(i,j,k,ispec)**2)
enddo
enddo
enddo
@@ -636,11 +663,11 @@
do k = 1, NGLLZ
do j = 1, NGLLY
do i = 1, NGLLX
- tmp_data(i,j,k,ispec) = sqrt( epsilondev_inner_core(1,i,j,k,ispec)**2 &
- + epsilondev_inner_core(2,i,j,k,ispec)**2 &
- + epsilondev_inner_core(3,i,j,k,ispec)**2 &
- + epsilondev_inner_core(4,i,j,k,ispec)**2 &
- + epsilondev_inner_core(5,i,j,k,ispec)**2)
+ tmp_data(i,j,k,ispec) = sqrt( epsilondev_xx_inner_core(i,j,k,ispec)**2 &
+ + epsilondev_yy_inner_core(i,j,k,ispec)**2 &
+ + epsilondev_xy_inner_core(i,j,k,ispec)**2 &
+ + epsilondev_xz_inner_core(i,j,k,ispec)**2 &
+ + epsilondev_yz_inner_core(i,j,k,ispec)**2)
enddo
enddo
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90 2012-02-13 23:52:29 UTC (rev 19621)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90 2012-02-14 15:11:07 UTC (rev 19622)
@@ -25,8 +25,93 @@
!
!=====================================================================
+ subroutine write_seismograms()
+
+! computes the maximum of the norm of the displacement
+! in all the slices using an MPI reduction
+! and output timestamp file to check that simulation is running fine
+
+ use specfem_par
+ use specfem_par_crustmantle
+ implicit none
+
+ ! update position in seismograms
+ seismo_current = seismo_current + 1
+
+ ! compute & store the seismograms only if there is at least one receiver located in this slice
+ if (nrec_local > 0) then
+ if (SIMULATION_TYPE == 1) then
+ call compute_seismograms(nrec_local,nrec,displ_crust_mantle, &
+ nu,hxir_store,hetar_store,hgammar_store, &
+ scale_displ,ibool_crust_mantle, &
+ ispec_selected_rec,number_receiver_global, &
+ seismo_current,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
+ seismograms)
+
+ else if (SIMULATION_TYPE == 2) then
+ call compute_seismograms_adjoint(NSOURCES,nrec_local,displ_crust_mantle, &
+ eps_trace_over_3_crust_mantle, &
+ epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+ epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+ nu_source,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
+ hxir_store,hetar_store,hgammar_store, &
+ hpxir_store,hpetar_store,hpgammar_store, &
+ tshift_cmt,hdur_gaussian,DT,t0,scale_displ, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ moment_der,sloc_der,stshift_der,shdur_der, &
+ NTSTEP_BETWEEN_OUTPUT_SEISMOS,seismograms,deltat, &
+ ibool_crust_mantle,ispec_selected_source,number_receiver_global, &
+ NSTEP,it,nit_written)
+
+ else if (SIMULATION_TYPE == 3) then
+ call compute_seismograms_backward(nrec_local,nrec,b_displ_crust_mantle, &
+ nu,hxir_store,hetar_store,hgammar_store, &
+ scale_displ,ibool_crust_mantle, &
+ ispec_selected_rec,number_receiver_global, &
+ seismo_current,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
+ seismograms)
+
+ endif
+ endif ! nrec_local
+
+ ! write the current or final seismograms
+ if(seismo_current == NTSTEP_BETWEEN_OUTPUT_SEISMOS .or. it == it_end) then
+ if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
+ call write_seismograms_to_file(myrank,seismograms,number_receiver_global,station_name, &
+ network_name,stlat,stlon,stele,stbur, &
+ nrec,nrec_local,ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI,DT,t0,it_end, &
+ yr_SAC,jda_SAC,ho_SAC,mi_SAC,sec_SAC,t_cmt_SAC,t_shift_SAC, &
+ elat_SAC,elon_SAC,depth_SAC,event_name_SAC,cmt_lat_SAC,cmt_lon_SAC,&
+ cmt_depth_SAC,cmt_hdur_SAC,NPROCTOT_VAL, &
+ OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM, &
+ OUTPUT_SEISMOS_SAC_BINARY,ROTATE_SEISMOGRAMS_RT,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
+ seismo_offset,seismo_current,WRITE_SEISMOGRAMS_BY_MASTER, &
+ SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE,MODEL)
+ if(myrank==0) then
+ write(IMAIN,*)
+ write(IMAIN,*) ' Total number of time steps written: ', it-it_begin+1
+ write(IMAIN,*)
+ endif
+ else
+ if( nrec_local > 0 ) &
+ call write_adj_seismograms(seismograms,number_receiver_global, &
+ nrec_local,it,nit_written,DT, &
+ NSTEP,NTSTEP_BETWEEN_OUTPUT_SEISMOS,t0,LOCAL_PATH)
+ nit_written = it
+ endif
+ seismo_offset = seismo_offset + seismo_current
+ seismo_current = 0
+ endif
+
+ end subroutine write_seismograms
+
+!=====================================================================
+
! write seismograms to files
- subroutine write_seismograms(myrank,seismograms,number_receiver_global,station_name, &
+ subroutine write_seismograms_to_file(myrank,seismograms,number_receiver_global,station_name, &
network_name,stlat,stlon,stele,stbur, &
nrec,nrec_local,ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI,DT,hdur,it_end, &
yr,jda,ho,mi,sec,tshift_cmt,t_shift, &
@@ -275,7 +360,7 @@
deallocate(one_seismogram)
- end subroutine write_seismograms
+ end subroutine write_seismograms_to_file
!=====================================================================
More information about the CIG-COMMITS
mailing list