[cig-commits] r19680 - in seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER: . examples/homogeneous_halfspace m4 src/cuda src/generate_databases src/shared src/specfem3D utils
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Fri Feb 24 17:37:34 PST 2012
Author: danielpeter
Date: 2012-02-24 17:37:33 -0800 (Fri, 24 Feb 2012)
New Revision: 19680
Added:
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/m4/cit_openmp.m4
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.F90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/create_specfem3D_gpu_cuda_method_stubs.pl
Removed:
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
Modified:
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure.ac
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/process.sh
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/check_mesh_resolution.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/serial.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
Log:
updates configuration files to use ./configure --with-openmp for OpenMP version; renames file endings to compute_forces_elastic.F90 and prepare_timerun.F90; updates preparations to use openmp routine also without mesh coloring
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure 2012-02-25 01:37:33 UTC (rev 19680)
@@ -639,11 +639,13 @@
ac_subst_vars='LTLIBOBJS
LIBOBJS
+NVCC
PYTHON_EGG_LDFLAGS
PYTHON_EGG_CPPFLAGS
PYTHON_EGG_CFLAGS
PYTHONPATH
LOCAL_PATH_IS_ALSO_GLOBAL
+OPENMP_LIB
MPI_INC
CUDA_INC
CUDA_LIB
@@ -711,6 +713,8 @@
PYTHON_PREFIX
PYTHON_VERSION
PYTHON
+COND_OPENMP_FALSE
+COND_OPENMP_TRUE
COND_CUDA_FALSE
COND_CUDA_TRUE
CUSTOM_MPI_TYPE
@@ -763,6 +767,7 @@
with_mpi
enable_double_precision
with_cuda
+with_openmp
with_scotch_dir
with_scotch_includedir
with_scotch_libdir
@@ -792,6 +797,7 @@
CUDA_LIB
CUDA_INC
MPI_INC
+OPENMP_LIB
LOCAL_PATH_IS_ALSO_GLOBAL
PYTHON
PYTHONPATH'
@@ -1429,6 +1435,7 @@
--with-pyre build Pyrized version [default=no]
--with-mpi build parallel version [default=yes]
--with-cuda build cuda GPU enabled version [default=no]
+ --with-openmp build OpenMP enabled version [default=no]
--with-scotch-dir define the root path to Scotch (e.g. /opt/scotch/)
--with-scotch-includedir
define the path to the Scotch headers (e.g.
@@ -1470,6 +1477,7 @@
CUDA_INC Location of CUDA include files
MPI_INC Location of MPI include mpi.h, which is needed by nvcc when
compiling cuda files
+ OPENMP_LIB Location of extra OpenMP libraries
LOCAL_PATH_IS_ALSO_GLOBAL
files on a local path on each node are also seen as global with
same path [default=true]
@@ -2001,6 +2009,23 @@
+# Check whether --with-openmp was given.
+if test "${with_openmp+set}" = set; then
+ withval=$with_openmp; want_openmp="$withval"
+else
+ want_openmp=no
+fi
+
+ if test "$want_openmp" = yes; then
+ COND_OPENMP_TRUE=
+ COND_OPENMP_FALSE='#'
+else
+ COND_OPENMP_TRUE='#'
+ COND_OPENMP_FALSE=
+fi
+
+
+
# Checks for programs.
if test "$want_pyre" = yes; then
@@ -7423,6 +7448,7 @@
fi
+
if test x"$LOCAL_PATH_IS_ALSO_GLOBAL" = x; then
LOCAL_PATH_IS_ALSO_GLOBAL=true
fi
@@ -7848,10 +7874,163 @@
fi
+if test "$want_cuda" = yes; then
+ # Extract the first word of "nvcc", so it can be a program name with args.
+set dummy nvcc; ac_word=$2
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_path_NVCC+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ case $NVCC in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_path_NVCC="$as_dir/$ac_word$ac_exec_ext"
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+IFS=$as_save_IFS
+
+ ;;
+esac
+fi
+NVCC=$ac_cv_path_NVCC
+if test -n "$NVCC"; then
+ { $as_echo "$as_me:$LINENO: result: $NVCC" >&5
+$as_echo "$NVCC" >&6; }
+else
+ { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ if test -z "$NVCC" ; then
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ { $as_echo "$as_me:$LINENO: error: cannot find 'nvcc' program.
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot find 'nvcc' program.
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
+ NVCC=`echo "Error: nvcc is not installed." ; false`
+ fi
+
+fi
+
+if test "$want_openmp" = yes; then
+
+ac_ext=${ac_fc_srcext-f}
+ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
+ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_fc_compiler_gnu
+
+cit_fc_save_fc=$FC
+cit_fc_save_fcflags=$FCFLAGS
+FC=$FC
+FCFLAGS="$FCFLAGS $FLAGS_NO_CHECK"
+
+{ $as_echo "$as_me:$LINENO: checking whether OpenMP directives work" >&5
+$as_echo_n "checking whether OpenMP directives work... " >&6; }
+
+#AC_COMPILE_IFELSE(_CIT_FC_TRIVIAL_OPENMP_PROGRAM, [
+# AC_MSG_RESULT(yes)
+#], [
+# AC_MSG_RESULT(no)
+# AC_MSG_FAILURE([cannot compile a trivial OpenMP program using $FC])
+#])
+
+cat >conftest.$ac_ext <<_ACEOF
+
+ program main
+
+ implicit none
+ integer OMP_get_thread_num
+ integer OMP_GET_MAX_THREADS
+ integer NUM_THREADS
+ integer thread_id
+
+ NUM_THREADS = OMP_GET_MAX_THREADS()
+ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(thread_id)
+ thread_id = OMP_get_thread_num()+1
+ !$OMP END PARALLEL
+
+ end
+
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
+
+ { $as_echo "$as_me:$LINENO: result: yes" >&5
+$as_echo "yes" >&6; }
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+ { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ { $as_echo "$as_me:$LINENO: error: cannot link a trivial OpenMP program using $FC with flags: $FLAGS_NO_CHECK
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot link a trivial OpenMP program using $FC with flags: $FLAGS_NO_CHECK
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
+
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+
+FC=$cit_fc_save_fc
+FCFLAGS=$cit_fc_save_fcflags
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+
+
# Checks for library functions.
-
ac_config_files="$ac_config_files Makefile in_data_files/Par_file in_data_files/CMTSOLUTION in_data_files/STATIONS src/meshfem3D/Makefile src/meshfem3D/constants.h src/meshfem3D/precision.h src/decompose_mesh_SCOTCH/Makefile src/specfem3D/Makefile src/generate_databases/Makefile src/shared/constants.h src/shared/precision.h src/decompose_mesh_SCOTCH/scotch_5.1.11/src/Makefile.inc src/check_mesh_quality_CUBIT_Abaqus/Makefile"
cat >confcache <<\_ACEOF
@@ -7972,6 +8151,13 @@
Usually this means the macro was only invoked conditionally." >&2;}
{ (exit 1); exit 1; }; }
fi
+if test -z "${COND_OPENMP_TRUE}" && test -z "${COND_OPENMP_FALSE}"; then
+ { { $as_echo "$as_me:$LINENO: error: conditional \"COND_OPENMP\" was never defined.
+Usually this means the macro was only invoked conditionally." >&5
+$as_echo "$as_me: error: conditional \"COND_OPENMP\" was never defined.
+Usually this means the macro was only invoked conditionally." >&2;}
+ { (exit 1); exit 1; }; }
+fi
: ${CONFIG_STATUS=./config.status}
ac_write_fail=0
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure.ac
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure.ac 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure.ac 2012-02-25 01:37:33 UTC (rev 19680)
@@ -54,7 +54,14 @@
[want_cuda=no])
AM_CONDITIONAL([COND_CUDA], [test "$want_cuda" = yes])
+AC_ARG_WITH([openmp],
+ [AC_HELP_STRING([--with-openmp],
+ [build OpenMP enabled version @<:@default=no@:>@])],
+ [want_openmp="$withval"],
+ [want_openmp=no])
+AM_CONDITIONAL([COND_OPENMP], [test "$want_openmp" = yes])
+
# Checks for programs.
if test "$want_pyre" = yes; then
@@ -357,6 +364,7 @@
if test x"$MPICC" = x; then
MPICC=mpicc
fi
+AC_ARG_VAR(OPENMP_LIB,[Location of extra OpenMP libraries])
AC_ARG_VAR(LOCAL_PATH_IS_ALSO_GLOBAL, [files on a local path on each node are also seen as global with same path @<:@default=true@:>@])
if test x"$LOCAL_PATH_IS_ALSO_GLOBAL" = x; then
@@ -427,10 +435,17 @@
CIT_FC_MAIN
fi
+if test "$want_cuda" = yes; then
+ CIT_CUDA_COMPILER
+fi
+if test "$want_openmp" = yes; then
+ CIT_FC_OPENMP_MODULE([$FC],[$FLAGS_NO_CHECK])
+fi
+
+
# Checks for library functions.
-
AC_CONFIG_FILES([Makefile in_data_files/Par_file in_data_files/CMTSOLUTION in_data_files/STATIONS src/meshfem3D/Makefile src/meshfem3D/constants.h src/meshfem3D/precision.h src/decompose_mesh_SCOTCH/Makefile src/specfem3D/Makefile src/generate_databases/Makefile src/shared/constants.h src/shared/precision.h src/decompose_mesh_SCOTCH/scotch_5.1.11/src/Makefile.inc src/check_mesh_quality_CUBIT_Abaqus/Makefile])
AC_OUTPUT
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/process.sh
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/process.sh 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/process.sh 2012-02-25 01:37:33 UTC (rev 19680)
@@ -40,9 +40,8 @@
# links executables
cd bin/
-ln -s ../../../bin/xdecompose_mesh_SCOTCH
-ln -s ../../../bin/xgenerate_databases
-ln -s ../../../bin/xspecfem3D
+rm -f bin/x*
+cp ../../../bin/x* ./
cd ../
# decomposes mesh
Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/m4/cit_openmp.m4
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/m4/cit_openmp.m4 (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/m4/cit_openmp.m4 2012-02-25 01:37:33 UTC (rev 19680)
@@ -0,0 +1,56 @@
+# -*- Autoconf -*-
+
+
+## ---------------------------- ##
+## Autoconf macros for Fortran. ##
+## ---------------------------- ##
+
+
+# CIT_FC_OPENMP_MODULE(FC, FCFLAGS)
+# -----------------------------------------------------
+AC_DEFUN([CIT_FC_OPENMP_MODULE], [
+AC_LANG_PUSH(Fortran)
+cit_fc_save_fc=$FC
+cit_fc_save_fcflags=$FCFLAGS
+FC=$1
+FCFLAGS="$FCFLAGS $2"
+
+AC_MSG_CHECKING([whether OpenMP directives work])
+
+#AC_COMPILE_IFELSE(_CIT_FC_TRIVIAL_OPENMP_PROGRAM, [
+# AC_MSG_RESULT(yes)
+#], [
+# AC_MSG_RESULT(no)
+# AC_MSG_FAILURE([cannot compile a trivial OpenMP program using $1])
+#])
+
+AC_LINK_IFELSE(_CIT_FC_TRIVIAL_OPENMP_PROGRAM, [
+ AC_MSG_RESULT(yes)
+], [
+ AC_MSG_RESULT(no)
+ AC_MSG_FAILURE([cannot link a trivial OpenMP program using $1 with flags: $2])
+])
+
+FC=$cit_fc_save_fc
+FCFLAGS=$cit_fc_save_fcflags
+
+
+AC_LANG_POP(Fortran)
+])dnl CIT_FC_OPENMP_MODULE
+
+AC_DEFUN([_CIT_FC_TRIVIAL_OPENMP_PROGRAM], [
+AC_LANG_PROGRAM([], [[
+ implicit none
+ integer OMP_get_thread_num
+ integer OMP_GET_MAX_THREADS
+ integer NUM_THREADS
+ integer thread_id
+
+ NUM_THREADS = OMP_GET_MAX_THREADS()
+ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(thread_id)
+ thread_id = OMP_get_thread_num()+1
+ !$OMP END PARALLEL
+]])
+])dnl _CIT_FC_TRIVIAL_OPENMP_PROGRAM
+
+dnl end of file
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu 2012-02-25 01:37:33 UTC (rev 19680)
@@ -115,11 +115,10 @@
extern "C"
void FC_FUNC_(compute_coupling_ac_el_cuda,
- COMPUTE_COUPLING_AC_EL_CUDA)(
- long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* num_coupling_ac_el_facesf,
- int* SIMULATION_TYPEf) {
+ COMPUTE_COUPLING_AC_EL_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* num_coupling_ac_el_facesf,
+ int* SIMULATION_TYPEf) {
TRACE("compute_coupling_ac_el_cuda");
//double start_time = get_time();
@@ -238,13 +237,13 @@
// note: uses potential chi such that displacement s = grad(chi),
// pressure becomes: p = - kappa ( div( s ) ) = rho ( - dot_dot_chi + g * s )
// g only acting in negative z-direction
-
+
// daniel: TODO - check gravity and coupling would be displ * nz correct?
pressure = rhol*( - potential_dot_dot_acoustic[iglob]
+ minus_g[iglob] * displ[iglob*3+2] );
- //daniel: TODO - check gravity and coupling
- //pressure = - potential_dot_dot_acoustic[iglob] ;
+ //daniel: TODO - check gravity and coupling
+ //pressure = - potential_dot_dot_acoustic[iglob] ;
//if( iface == 128 && igll == 5 ){
// printf("coupling acoustic: %f %f \n",potential_dot_dot_acoustic[iglob],
// minus_g[iglob] * displ[iglob*3+2]);
@@ -252,7 +251,7 @@
}else{
// no gravity: uses potential chi such that displacement s = 1/rho grad(chi)
- // pressure p = - kappa ( div( s ) ) then becomes: p = - dot_dot_chi
+ // pressure p = - kappa ( div( s ) ) then becomes: p = - dot_dot_chi
// ( multiplied with factor 1/kappa due to setup of equation of motion )
pressure = - potential_dot_dot_acoustic[iglob];
}
@@ -278,11 +277,10 @@
extern "C"
void FC_FUNC_(compute_coupling_el_ac_cuda,
- COMPUTE_COUPLING_EL_AC_CUDA)(
- long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* num_coupling_ac_el_facesf,
- int* SIMULATION_TYPEf) {
+ COMPUTE_COUPLING_EL_AC_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* num_coupling_ac_el_facesf,
+ int* SIMULATION_TYPEf) {
TRACE("compute_coupling_el_ac_cuda");
//double start_time = get_time();
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu 2012-02-25 01:37:33 UTC (rev 19680)
@@ -412,25 +412,25 @@
// assumes that g only acts in (negative) z-direction
kappa_invl = 1.f / d_kappastore[working_element*NGLL3 + tx];
iglob = d_ibool[working_element*NGLL3 + tx]-1;
-
+
// daniel: TODO - check gravity
// if( kappa_invl <= 0.0f ){
// printf("kappa error: %f %f\n",kappa_invl,d_kappastore[working_element*NGLL3 + tx]);
// printf("kappa error: thread %d %d \n",tx,working_element);
-// asm("trap;");
+// asm("trap;");
// }
// if( iglob <= 0 ){
// printf("iglob error: %d %d %d \n",iglob,tx,working_element);
-// asm("trap;");
-// }
-
+// asm("trap;");
+// }
+
gravity_term = minus_g[iglob] * kappa_invl * jacobianl * wgll_cube[tx] * dpotentialdzl;
// daniel: TODO - check gravity
//gravity_term = 0.f;
//if( iglob == 5 ){
// printf("iglob infos: %f %f %f %f %f \n",minus_g[iglob],kappa_invl,jacobianl,wgll_cube[tx],dpotentialdzl);
- //}
+ //}
}
// density (reciproc)
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu 2012-02-25 01:37:33 UTC (rev 19680)
@@ -488,8 +488,8 @@
// debug
//*rho_s_H1 = 0.f;
//*rho_s_H2 = 0.f;
- //*rho_s_H3 = 0.f ;
-
+ //*rho_s_H3 = 0.f ;
+
}
/* ----------------------------------------------------------------------------------------------- */
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-02-25 01:37:33 UTC (rev 19680)
@@ -124,8 +124,8 @@
// cuda kernel block size for updating displacements/potential (newmark time scheme)
// current hardware: 128 is slightly faster than 256 ( ~ 4%)
-#define BLOCKSIZE_KERNEL1 128
-#define BLOCKSIZE_KERNEL3 128
+#define BLOCKSIZE_KERNEL1 128
+#define BLOCKSIZE_KERNEL3 128
#define BLOCKSIZE_TRANSFER 256
/* ----------------------------------------------------------------------------------------------- */
@@ -240,7 +240,6 @@
// receivers
int* d_number_receiver_global;
int* d_ispec_selected_rec;
- int* d_islice_selected_rec;
int nrec_local;
realw* d_station_seismo_field;
realw* h_station_seismo_field;
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2012-02-25 01:37:33 UTC (rev 19680)
@@ -64,7 +64,7 @@
MPI_Comm_rank(MPI_COMM_WORLD,&procid);
#else
procid = 0;
-#endif
+#endif
printf("%d: sends msg_id %d\n",procid,*id);
}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-02-25 01:37:33 UTC (rev 19680)
@@ -81,8 +81,10 @@
gethostname(hostname, sizeof(hostname));
printf("PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
FILE *file = fopen("/scratch/eiger/rietmann/attach_gdb.txt","w+");
- fprintf(file,"PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
- fclose(file);
+ if (file != NULL){
+ fprintf(file,"PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
+ fclose(file);
+ }
fflush(stdout);
while (0 == i)
sleep(5);
@@ -128,7 +130,7 @@
fflush(stdout);
#ifdef WITH_MPI
MPI_Abort(MPI_COMM_WORLD,1);
-#endif
+#endif
exit(EXIT_FAILURE);
}
return;
@@ -166,9 +168,11 @@
sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_device_mem_usage_proc_%06d.txt",myrank);
fp = fopen(filename,"a+");
- fprintf(fp,"%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
- used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
- fclose(fp);
+ if (fp != NULL){
+ fprintf(fp,"%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
+ used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+ fclose(fp);
+ }
}
/* ----------------------------------------------------------------------------------------------- */
@@ -191,7 +195,7 @@
// show memory usage of GPU
int myrank;
-#ifdef WITH_MPI
+#ifdef WITH_MPI
MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
#else
myrank = 0;
@@ -204,15 +208,17 @@
used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
}
+*/
- extern "C"
- void FC_FUNC_(show_free_device_memory,
- SHOW_FREE_DEVICE_MEMORY)() {
+/*
+extern "C"
+void FC_FUNC_(show_free_device_memory,
+ SHOW_FREE_DEVICE_MEMORY)() {
TRACE("show_free_device_memory");
-
+
show_free_memory("from fortran");
- }
- */
+}
+*/
/* ----------------------------------------------------------------------------------------------- */
@@ -474,23 +480,23 @@
int device_count = 0;
cudaGetDeviceCount(&device_count);
exit_on_cuda_error("CUDA runtime cudaGetDeviceCount: check if driver and runtime libraries work together\nexiting...\n");
-
+
// returns device count to fortran
if (device_count == 0) exit_on_error("CUDA runtime error: there is no device supporting CUDA\n");
*ncuda_devices = device_count;
- // Sets the active device
+ // Sets the active device
if(device_count > 1) {
// generalized for more GPUs per node
// note: without previous context release, cudaSetDevice will complain with the cuda error
// "setting the device when a process is active is not allowed"
- // releases previous contexts
+ // releases previous contexts
cudaThreadExit();
-
+
//printf("rank %d: cuda device count = %d sets device = %d \n",myrank,device_count,myrank % device_count);
//MPI_Barrier(MPI_COMM_WORLD);
-
+
// sets active device
cudaSetDevice( myrank % device_count );
exit_on_cuda_error("cudaSetDevice");
@@ -515,47 +521,48 @@
FILE* fp;
sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
fp = fopen(filename,"a+");
+ if (fp != NULL){
+ // display device properties
+ fprintf(fp,"Device Name = %s\n",deviceProp.name);
+ fprintf(fp,"multiProcessorCount: %d\n",deviceProp.multiProcessorCount);
+ fprintf(fp,"totalGlobalMem (in MB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f));
+ fprintf(fp,"totalGlobalMem (in GB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f * 1024.f));
+ fprintf(fp,"sharedMemPerBlock (in bytes): %lu\n",(unsigned long) deviceProp.sharedMemPerBlock);
+ fprintf(fp,"Maximum number of threads per block: %d\n",deviceProp.maxThreadsPerBlock);
+ fprintf(fp,"Maximum size of each dimension of a block: %d x %d x %d\n",
+ deviceProp.maxThreadsDim[0],deviceProp.maxThreadsDim[1],deviceProp.maxThreadsDim[2]);
+ fprintf(fp,"Maximum sizes of each dimension of a grid: %d x %d x %d\n",
+ deviceProp.maxGridSize[0],deviceProp.maxGridSize[1],deviceProp.maxGridSize[2]);
+ fprintf(fp,"Compute capability of the device = %d.%d\n", deviceProp.major, deviceProp.minor);
+ if(deviceProp.canMapHostMemory){
+ fprintf(fp,"canMapHostMemory: TRUE\n");
+ }else{
+ fprintf(fp,"canMapHostMemory: FALSE\n");
+ }
+ if(deviceProp.deviceOverlap){
+ fprintf(fp,"deviceOverlap: TRUE\n");
+ }else{
+ fprintf(fp,"deviceOverlap: FALSE\n");
+ }
- // display device properties
- fprintf(fp,"Device Name = %s\n",deviceProp.name);
- fprintf(fp,"multiProcessorCount: %d\n",deviceProp.multiProcessorCount);
- fprintf(fp,"totalGlobalMem (in MB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f));
- fprintf(fp,"totalGlobalMem (in GB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f * 1024.f));
- fprintf(fp,"sharedMemPerBlock (in bytes): %lu\n",(unsigned long) deviceProp.sharedMemPerBlock);
- fprintf(fp,"Maximum number of threads per block: %d\n",deviceProp.maxThreadsPerBlock);
- fprintf(fp,"Maximum size of each dimension of a block: %d x %d x %d\n",
- deviceProp.maxThreadsDim[0],deviceProp.maxThreadsDim[1],deviceProp.maxThreadsDim[2]);
- fprintf(fp,"Maximum sizes of each dimension of a grid: %d x %d x %d\n",
- deviceProp.maxGridSize[0],deviceProp.maxGridSize[1],deviceProp.maxGridSize[2]);
- fprintf(fp,"Compute capability of the device = %d.%d\n", deviceProp.major, deviceProp.minor);
- if(deviceProp.canMapHostMemory){
- fprintf(fp,"canMapHostMemory: TRUE\n");
- }else{
- fprintf(fp,"canMapHostMemory: FALSE\n");
- }
- if(deviceProp.deviceOverlap){
- fprintf(fp,"deviceOverlap: TRUE\n");
- }else{
- fprintf(fp,"deviceOverlap: FALSE\n");
- }
+ // make sure that the device has compute capability >= 1.3
+ //if (deviceProp.major < 1){
+ // fprintf(stderr,"Compute capability major number should be at least 1, exiting...\n\n");
+ // exit_on_error("CUDA Compute capability major number should be at least 1");
+ //}
+ //if (deviceProp.major == 1 && deviceProp.minor < 3){
+ // fprintf(stderr,"Compute capability should be at least 1.3, exiting...\n");
+ // exit_on_error("CUDA Compute capability major number should be at least 1.3");
+ //}
- // make sure that the device has compute capability >= 1.3
- //if (deviceProp.major < 1){
- // fprintf(stderr,"Compute capability major number should be at least 1, exiting...\n\n");
- // exit_on_error("CUDA Compute capability major number should be at least 1");
- //}
- //if (deviceProp.major == 1 && deviceProp.minor < 3){
- // fprintf(stderr,"Compute capability should be at least 1.3, exiting...\n");
- // exit_on_error("CUDA Compute capability major number should be at least 1.3");
- //}
+ // outputs initial memory infos via cudaMemGetInfo()
+ double free_db,used_db,total_db;
+ get_free_memory(&free_db,&used_db,&total_db);
+ fprintf(fp,"%d: GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank,
+ used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
- // outputs initial memory infos via cudaMemGetInfo()
- double free_db,used_db,total_db;
- get_free_memory(&free_db,&used_db,&total_db);
- fprintf(fp,"%d: GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank,
- used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
-
- fclose(fp);
+ fclose(fp);
+ }
}
/* ----------------------------------------------------------------------------------------------- */
@@ -1490,13 +1497,6 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f);
- // allocates arrays for receivers
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_rec,
- *islice_selected_rec_size*sizeof(int)),6001);
- // copies arrays to GPU device
- print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_rec,islice_selected_rec,
- *islice_selected_rec_size*sizeof(int),cudaMemcpyHostToDevice),6002);
-
// adjoint source arrays
mp->nadj_rec_local = *nadj_rec_local;
if( mp->nadj_rec_local > 0 ){
@@ -1900,7 +1900,6 @@
// purely adjoint & kernel array
if( *SIMULATION_TYPE == 2 || *SIMULATION_TYPE == 3 ){
- cudaFree(mp->d_islice_selected_rec);
if(mp->nadj_rec_local > 0 ){
cudaFree(mp->d_adj_sourcearrays);
cudaFree(mp->d_pre_computed_irec);
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-02-25 01:37:33 UTC (rev 19680)
@@ -1,30 +1,30 @@
-/*
- !=====================================================================
- !
- ! S p e c f e m 3 D V e r s i o n 2 . 0
- ! ---------------------------------------
- !
- ! Main authors: Dimitri Komatitsch and Jeroen Tromp
- ! Princeton University, USA and University of Pau / CNRS / INRIA
- ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
- ! April 2011
- !
- ! This program is free software; you can redistribute it and/or modify
- ! it under the terms of the GNU General Public License as published by
- ! the Free Software Foundation; either version 2 of the License, or
- ! (at your option) any later version.
- !
- ! This program is distributed in the hope that it will be useful,
- ! but WITHOUT ANY WARRANTY; without even the implied warranty of
- ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ! GNU General Public License for more details.
- !
- ! You should have received a copy of the GNU General Public License along
- ! with this program; if not, write to the Free Software Foundation, Inc.,
- ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- !
- !=====================================================================
- */
+/*
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+*/
#include <stdio.h>
#include <stdlib.h>
@@ -33,101 +33,72 @@
typedef float realw;
-/* from check_fields_cuda.cu */
+
+
+//
+// src/cuda/check_fields_cuda.cu
+//
+
void FC_FUNC_(check_max_norm_displ_gpu,
- CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID){}
+ CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {}
void FC_FUNC_(check_max_norm_vector,
- CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID){}
+ CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID) {}
+
void FC_FUNC_(check_max_norm_displ,
- CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID){}
+ CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID) {}
void FC_FUNC_(check_max_norm_b_displ_gpu,
- CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID){}
+ CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID) {}
void FC_FUNC_(check_max_norm_b_accel_gpu,
- CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID){}
+ CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID) {}
void FC_FUNC_(check_max_norm_b_veloc_gpu,
- CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID){}
+ CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID) {}
void FC_FUNC_(check_max_norm_b_displ,
- CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID){}
+ CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID) {}
void FC_FUNC_(check_max_norm_b_accel,
- CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID){}
+ CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID) {}
void FC_FUNC_(check_error_vectors,
- CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2){}
+ CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2) {}
void FC_FUNC_(get_max_accel,
- GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer){}
+ GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {}
void FC_FUNC_(get_norm_acoustic_from_device,
GET_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
long* Mesh_pointer_f,
- int* SIMULATION_TYPE){}
+ int* SIMULATION_TYPE) {}
void FC_FUNC_(get_norm_elastic_from_device,
GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
long* Mesh_pointer_f,
- int* SIMULATION_TYPE){}
+ int* SIMULATION_TYPE) {}
-/* from file compute_add_sources_elastic_cuda.cu */
+//
+// src/cuda/compute_add_sources_acoustic_cuda.cu
+//
-void FC_FUNC_(compute_add_sources_el_cuda,
- COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* NSOURCESf,
- double* h_stf_pre_compute,
- int* myrankf){}
-
-void FC_FUNC_(compute_add_sources_el_s3_cuda,
- COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
- double* h_stf_pre_compute,
- int* NSOURCES,
- int* phase_is_inner,
- int* myrank){}
-
-void FC_FUNC_(add_source_master_rec_noise_cu,
- ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f,
- int* myrank_f,
- int* it_f,
- int* irec_master_noise_f,
- int* islice_selected_rec){}
-
-void FC_FUNC_(add_sources_el_sim_type_2_or_3,
- ADD_SOURCES_EL_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
- realw* h_adj_sourcearrays,
- int* phase_is_inner,
- int* h_ispec_is_inner,
- int* h_ispec_is_elastic,
- int* h_ispec_selected_rec,
- int* myrank,
- int* nrec,
- int* time_index,
- int* h_islice_selected_rec,
- int* nadj_rec_local,
- int* NTSTEP_BETWEEN_READ_ADJSRC){}
-
-/* from file compute_add_sources_acoustic_cuda.cu */
-
void FC_FUNC_(compute_add_sources_ac_cuda,
COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* NSOURCESf,
int* SIMULATION_TYPEf,
double* h_stf_pre_compute,
- int* myrankf){}
+ int* myrankf) {}
void FC_FUNC_(compute_add_sources_ac_s3_cuda,
- COMPUTE_ADD_SOURCES_AC_S3_CUDA)(long* Mesh_pointer_f,
+ COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* NSOURCESf,
int* SIMULATION_TYPEf,
double* h_stf_pre_compute,
- int* myrankf){}
+ int* myrankf) {}
void FC_FUNC_(add_sources_ac_sim_2_or_3_cuda,
ADD_SOURCES_AC_SIM_2_OR_3_CUDA)(long* Mesh_pointer,
@@ -141,26 +112,70 @@
int* time_index,
int* h_islice_selected_rec,
int* nadj_rec_local,
- int* NTSTEP_BETWEEN_ADJSRC){}
+ int* NTSTEP_BETWEEN_READ_ADJSRC) {}
-/* from compute_coupling_cuda.cu */
-void FC_FUNC_(compute_coupling_ac_el_cuda,
- COMPUTE_COUPLING_AC_EL_CUDA)(
- long* Mesh_pointer_f,
+//
+// src/cuda/compute_add_sources_elastic_cuda.cu
+//
+
+void FC_FUNC_(compute_add_sources_el_cuda,
+ COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
- int* num_coupling_ac_el_facesf,
- int* SIMULATION_TYPEf){}
+ int* NSOURCESf,
+ double* h_stf_pre_compute,
+ int* myrankf) {}
+void FC_FUNC_(compute_add_sources_el_s3_cuda,
+ COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
+ double* h_stf_pre_compute,
+ int* NSOURCESf,
+ int* phase_is_inner,
+ int* myrank) {}
+
+void FC_FUNC_(add_source_master_rec_noise_cu,
+ ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f,
+ int* myrank_f,
+ int* it_f,
+ int* irec_master_noise_f,
+ int* islice_selected_rec) {}
+
+void FC_FUNC_(add_sources_el_sim_type_2_or_3,
+ ADD_SOURCES_EL_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
+ realw* h_adj_sourcearrays,
+ int* phase_is_inner,
+ int* h_ispec_is_inner,
+ int* h_ispec_is_elastic,
+ int* h_ispec_selected_rec,
+ int* myrank,
+ int* nrec,
+ int* time_index,
+ int* h_islice_selected_rec,
+ int* nadj_rec_local,
+ int* NTSTEP_BETWEEN_READ_ADJSRC) {}
+
+
+//
+// src/cuda/compute_coupling_cuda.cu
+//
+
+void FC_FUNC_(compute_coupling_ac_el_cuda,
+ COMPUTE_COUPLING_AC_EL_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* num_coupling_ac_el_facesf,
+ int* SIMULATION_TYPEf) {}
+
void FC_FUNC_(compute_coupling_el_ac_cuda,
- COMPUTE_COUPLING_EL_AC_CUDA)(
- long* Mesh_pointer_f,
- int* phase_is_innerf,
- int* num_coupling_ac_el_facesf,
- int* SIMULATION_TYPEf){}
+ COMPUTE_COUPLING_EL_AC_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* num_coupling_ac_el_facesf,
+ int* SIMULATION_TYPEf) {}
-/* from compute_forces_acoustic_cuda.cu */
+//
+// src/cuda/compute_forces_acoustic_cuda.cu
+//
+
void FC_FUNC_(transfer_boun_pot_from_device,
TRANSFER_BOUN_POT_FROM_DEVICE)(
int* size,
@@ -171,7 +186,7 @@
int* max_nibool_interfaces_ext_mesh,
int* nibool_interfaces_ext_mesh,
int* ibool_interfaces_ext_mesh,
- int* FORWARD_OR_ADJOINT){}
+ int* FORWARD_OR_ADJOINT){}
void FC_FUNC_(transfer_asmbl_pot_to_device,
TRANSFER_ASMBL_POT_TO_DEVICE)(
@@ -182,52 +197,54 @@
int* max_nibool_interfaces_ext_mesh,
int* nibool_interfaces_ext_mesh,
int* ibool_interfaces_ext_mesh,
- int* FORWARD_OR_ADJOINT){}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(compute_forces_acoustic_cuda,
COMPUTE_FORCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
int* iphase,
int* nspec_outer_acoustic,
int* nspec_inner_acoustic,
- int* SIMULATION_TYPE){}
+ int* SIMULATION_TYPE) {}
void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
long* Mesh_pointer,
int* size_F,
- int* SIMULATION_TYPE){}
+ int* SIMULATION_TYPE) {}
void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
long* Mesh_pointer,
int* size_F,
realw* deltatover2_F,
int* SIMULATION_TYPE,
- realw* b_deltatover2_F){}
+ realw* b_deltatover2_F) {}
void FC_FUNC_(acoustic_enforce_free_surf_cuda,
ACOUSTIC_ENFORCE_FREE_SURF_CUDA)(long* Mesh_pointer_f,
int* SIMULATION_TYPE,
- int* ABSORB_FREE_SURFACE){}
+ int* ABSORB_FREE_SURFACE) {}
-/* from compute_forces_elastic_cuda.cu */
+//
+// src/cuda/compute_forces_elastic_cuda.cu
+//
+
void FC_FUNC_(transfer_boun_accel_from_device,
TRANSFER_BOUN_ACCEL_FROM_DEVICE)(int* size, long* Mesh_pointer_f, realw* accel,
- realw* send_accel_buffer,
- int* num_interfaces_ext_mesh,
- int* max_nibool_interfaces_ext_mesh,
- int* nibool_interfaces_ext_mesh,
- int* ibool_interfaces_ext_mesh,
- int* FORWARD_OR_ADJOINT){}
+ realw* send_accel_buffer,
+ int* num_interfaces_ext_mesh,
+ int* max_nibool_interfaces_ext_mesh,
+ int* nibool_interfaces_ext_mesh,
+ int* ibool_interfaces_ext_mesh,
+ int* FORWARD_OR_ADJOINT){}
-
void FC_FUNC_(transfer_asmbl_accel_to_device,
- TRANSFER_ASMBL_ACCEL_TO_DEVICE)(long* Mesh_pointer,
- realw* accel,
- realw* buffer_recv_vector_ext_mesh,
- int* num_interfaces_ext_mesh,
- int* max_nibool_interfaces_ext_mesh,
- int* nibool_interfaces_ext_mesh,
- int* ibool_interfaces_ext_mesh,int* FORWARD_OR_ADJOINT){}
+ TRANSFER_ASMBL_ACCEL_TO_DEVICE)(long* Mesh_pointer, realw* accel,
+ realw* buffer_recv_vector_ext_mesh,
+ int* num_interfaces_ext_mesh,
+ int* max_nibool_interfaces_ext_mesh,
+ int* nibool_interfaces_ext_mesh,
+ int* ibool_interfaces_ext_mesh,
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(compute_forces_elastic_cuda,
COMPUTE_FORCES_ELASTIC_CUDA)(long* Mesh_pointer_f,
@@ -237,7 +254,7 @@
int* SIMULATION_TYPE,
int* COMPUTE_AND_STORE_STRAIN,
int* ATTENUATION,
- int* ANISOTROPY){}
+ int* ANISOTROPY) {}
void FC_FUNC_(kernel_3_a_cuda,
KERNEL_3_A_CUDA)(long* Mesh_pointer,
@@ -245,62 +262,76 @@
realw* deltatover2_F,
int* SIMULATION_TYPE_f,
realw* b_deltatover2_F,
- int* OCEANS){}
+ int* OCEANS) {}
void FC_FUNC_(kernel_3_b_cuda,
KERNEL_3_B_CUDA)(long* Mesh_pointer,
int* size_F,
realw* deltatover2_F,
int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F){}
+ realw* b_deltatover2_F) {}
void FC_FUNC_(elastic_ocean_load_cuda,
ELASTIC_OCEAN_LOAD_CUDA)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE){}
+ int* SIMULATION_TYPE) {}
-/* from file compute_kernels_cuda.cu */
+//
+// src/cuda/compute_kernels_cuda.cu
+//
+
void FC_FUNC_(compute_kernels_elastic_cuda,
COMPUTE_KERNELS_ELASTIC_CUDA)(long* Mesh_pointer,
- realw* deltat_f){}
+ realw* deltat_f) {}
void FC_FUNC_(compute_kernels_strgth_noise_cu,
COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer,
- realw* h_noise_surface_movie,
- realw* deltat){}
+ realw* h_noise_surface_movie,
+ realw* deltat) {}
void FC_FUNC_(compute_kernels_acoustic_cuda,
COMPUTE_KERNELS_ACOUSTIC_CUDA)(
long* Mesh_pointer,
- realw* deltat_f){}
+ realw* deltat_f) {}
void FC_FUNC_(compute_kernels_hess_cuda,
COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
- realw* deltat_f) {}
+ realw* deltat_f,
+ int* ELASTIC_SIMULATION,
+ int* ACOUSTIC_SIMULATION) {}
-/* from file compute_stacey_acoustic_cuda.cu */
+
+//
+// src/cuda/compute_stacey_acoustic_cuda.cu
+//
+
void FC_FUNC_(compute_stacey_acoustic_cuda,
COMPUTE_STACEY_ACOUSTIC_CUDA)(
long* Mesh_pointer_f,
int* phase_is_innerf,
int* SIMULATION_TYPEf,
int* SAVE_FORWARDf,
- realw* h_b_absorb_potential){}
+ realw* h_b_absorb_potential) {}
-/* from file compute_stacey_elastic_cuda.cu */
+//
+// src/cuda/compute_stacey_elastic_cuda.cu
+//
void FC_FUNC_(compute_stacey_elastic_cuda,
COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
int* phase_is_innerf,
int* SIMULATION_TYPEf,
int* SAVE_FORWARDf,
- realw* h_b_absorb_field){}
+ realw* h_b_absorb_field) {}
-/* from file it_update_displacement_cuda.cu */
+//
+// src/cuda/it_update_displacement_cuda.cu
+//
+
void FC_FUNC_(it_update_displacement_cuda,
- it_update_displacement_cuda)(long* Mesh_pointer_f,
+ IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f,
int* size_F,
realw* deltat_F,
realw* deltatsqover2_F,
@@ -308,55 +339,64 @@
int* SIMULATION_TYPE,
realw* b_deltat_F,
realw* b_deltatsqover2_F,
- realw* b_deltatover2_F){}
+ realw* b_deltatover2_F) {}
void FC_FUNC_(it_update_displacement_ac_cuda,
- IT_UPDATE_DISPLACEMENT_AC_CUDA)(long* Mesh_pointer_f,
- int* size_F,
- realw* deltat_F,
- realw* deltatsqover2_F,
- realw* deltatover2_F,
- int* SIMULATION_TYPE,
- realw* b_deltat_F,
- realw* b_deltatsqover2_F,
- realw* b_deltatover2_F){}
+ it_update_displacement_ac_cuda)(long* Mesh_pointer_f,
+ int* size_F,
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ int* SIMULATION_TYPE,
+ realw* b_deltat_F,
+ realw* b_deltatsqover2_F,
+ realw* b_deltatover2_F) {}
-/* from file noise_tomography_cuda.cu */
-void FC_FUNC_(fortranflush,FORTRANFLUSH)(int* rank){}
-void FC_FUNC_(fortranprint,FORTRANPRINT)(int* id){}
+//
+// src/cuda/noise_tomography_cuda.cu
+//
-void FC_FUNC_(fortranprintf,FORTRANPRINTF)(realw* val){}
+void FC_FUNC_(fortranflush,FORTRANFLUSH)(int* rank){}
-void FC_FUNC_(fortranprintd,FORTRANPRINTD)(double* val){}
+void FC_FUNC_(fortranprint,FORTRANPRINT)(int* id) {}
-void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,realw* h_displ){}
+void FC_FUNC_(fortranprintf,FORTRANPRINTF)(realw* val) {}
+void FC_FUNC_(fortranprintd,FORTRANPRINTD)(double* val) {}
+
+void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,realw* h_displ) {}
+
void FC_FUNC_(transfer_surface_to_host,
TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,
- realw* h_noise_surface_movie){}
+ realw* h_noise_surface_movie) {}
void FC_FUNC_(noise_read_add_surface_movie_cu,
NOISE_READ_ADD_SURFACE_MOVIE_CU)(long* Mesh_pointer_f,
realw* h_noise_surface_movie,
- int* NOISE_TOMOGRAPHYf){}
+ int* NOISE_TOMOGRAPHYf) {}
-/* from file prepare_mesh_constants_cuda.cu */
+//
+// src/cuda/prepare_mesh_constants_cuda.cu
+//
-void FC_FUNC_(pause_for_debug,PAUSE_FOR_DEBUG)(){}
+void FC_FUNC_(pause_for_debug,PAUSE_FOR_DEBUG)() {}
+
void FC_FUNC_(output_free_device_memory,
- OUTPUT_FREE_DEVICE_MEMORY)(int* myrank){}
+ OUTPUT_FREE_DEVICE_MEMORY)(int* myrank) {}
void FC_FUNC_(show_free_device_memory,
- SHOW_FREE_DEVICE_MEMORY)(){}
+ SHOW_FREE_DEVICE_MEMORY)() {}
void FC_FUNC_(get_free_device_memory,
- get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ){}
+ get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ) {}
-
void FC_FUNC_(prepare_cuda_device,
- PREPARE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices){}
+ PREPARE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices) {
+ fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\n");
+ exit(1);
+}
void FC_FUNC_(prepare_constants_device,
PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
@@ -367,8 +407,10 @@
realw* h_gammax, realw* h_gammay, realw* h_gammaz,
realw* h_kappav, realw* h_muv,
int* h_ibool,
- int* num_interfaces_ext_mesh, int* max_nibool_interfaces_ext_mesh,
- int* h_nibool_interfaces_ext_mesh, int* h_ibool_interfaces_ext_mesh,
+ int* num_interfaces_ext_mesh,
+ int* max_nibool_interfaces_ext_mesh,
+ int* h_nibool_interfaces_ext_mesh,
+ int* h_ibool_interfaces_ext_mesh,
realw* h_hprime_xx,realw* h_hprime_yy,realw* h_hprime_zz,
realw* h_hprimewgll_xx,realw* h_hprimewgll_yy,realw* h_hprimewgll_zz,
realw* h_wgllwgll_xy,realw* h_wgllwgll_xz,realw* h_wgllwgll_yz,
@@ -379,7 +421,7 @@
int* h_num_abs_boundary_faces,
int* h_ispec_is_inner,
int* NSOURCES,
- int* nsources_local,
+ int* nsources_local_f,
realw* h_sourcearrays,
int* h_islice_selected_source,
int* h_ispec_selected_source,
@@ -388,22 +430,9 @@
int* nrec_f,
int* nrec_local_f,
int* SIMULATION_TYPE,
- int* USE_MESH_COLORING_GPU,
- int* nspec_acoustic,int* nspec_elastic)
-{
- fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\n");
- exit(1);
-}
+ int* USE_MESH_COLORING_GPU_f,
+ int* nspec_acoustic,int* nspec_elastic) {}
-void FC_FUNC_(prepare_sim2_or_3_const_device,
- PREPARE_SIM2_OR_3_CONST_DEVICE)(
- long* Mesh_pointer_f,
- int* islice_selected_rec,
- int* islice_selected_rec_size,
- int* nadj_rec_local,
- int* nrec,
- int* myrank){}
-
void FC_FUNC_(prepare_fields_acoustic_device,
PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f,
realw* rmass_acoustic,
@@ -427,12 +456,13 @@
realw* coupling_ac_el_jacobian2Dw,
int* num_colors_outer_acoustic,
int* num_colors_inner_acoustic,
- int* num_elem_colors_acoustic){}
+ int* num_elem_colors_acoustic) {}
void FC_FUNC_(prepare_fields_acoustic_adj_dev,
PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE,
- int* APPROXIMATE_HESS_KL) {}
+ int* SIMULATION_TYPE,
+ int* APPROXIMATE_HESS_KL) {}
+
void FC_FUNC_(prepare_fields_elastic_device,
PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
int* size,
@@ -465,7 +495,7 @@
int* num_colors_outer_elastic,
int* num_colors_inner_elastic,
int* num_elem_colors_elastic,
- int* ANISOTROY,
+ int* ANISOTROPY,
realw *c11store,
realw *c12store,
realw *c13store,
@@ -486,23 +516,31 @@
realw *c46store,
realw *c55store,
realw *c56store,
- realw *c66store){}
+ realw *c66store){}
void FC_FUNC_(prepare_fields_elastic_adj_dev,
PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
- int* size,
- int* SIMULATION_TYPE,
- int* COMPUTE_AND_STORE_STRAIN,
- realw* epsilon_trace_over_3,
- realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy,
- realw* b_epsilondev_xz,realw* b_epsilondev_yz,
- realw* b_epsilon_trace_over_3,
- int* ATTENUATION,
- int* R_size,
- realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
- realw* b_alphaval,realw* b_betaval,realw* b_gammaval,
- int* APPROXIMATE_HESS_KL){}
+ int* size,
+ int* SIMULATION_TYPE,
+ int* COMPUTE_AND_STORE_STRAIN,
+ realw* epsilon_trace_over_3,
+ realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy,
+ realw* b_epsilondev_xz,realw* b_epsilondev_yz,
+ realw* b_epsilon_trace_over_3,
+ int* ATTENUATION,
+ int* R_size,
+ realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
+ realw* b_alphaval,realw* b_betaval,realw* b_gammaval,
+ int* APPROXIMATE_HESS_KL){}
+void FC_FUNC_(prepare_sim2_or_3_const_device,
+ PREPARE_SIM2_OR_3_CONST_DEVICE)(
+ long* Mesh_pointer_f,
+ int* islice_selected_rec,
+ int* islice_selected_rec_size,
+ int* nadj_rec_local,
+ int* nrec,
+ int* myrank) {}
void FC_FUNC_(prepare_fields_noise_device,
PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
@@ -518,8 +556,17 @@
realw* normal_y_noise,
realw* normal_z_noise,
realw* mask_noise,
- realw* free_surface_jacobian2Dw){}
+ realw* free_surface_jacobian2Dw) {}
+void FC_FUNC_(prepare_fields_gravity_device,
+ PREPARE_FIELDS_gravity_DEVICE)(long* Mesh_pointer_f,
+ int* GRAVITY,
+ realw* minus_deriv_gravity,
+ realw* minus_g,
+ realw* h_wgll_cube,
+ int* ACOUSTIC_SIMULATION,
+ realw* rhostore) {}
+
void FC_FUNC_(prepare_cleanup_device,
PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f,
int* SIMULATION_TYPE,
@@ -532,57 +579,50 @@
int* ATTENUATION,
int* ANISOTROPY,
int* OCEANS,
- int* APPROXIMATE_HESS_KL){}
+ int* APPROXIMATE_HESS_KL) {}
-void FC_FUNC_(prepare_fields_gravity_device,
- PREPARE_FIELDS_gravity_DEVICE)(long* Mesh_pointer_f,
- int* GRAVITY,
- realw* minus_deriv_gravity,
- realw* minus_g,
- realw* h_wgll_cube,
- int* ACOUSTIC_SIMULATION,
- realw* rhostore){}
-/* from file transfer_fields_cuda.cu */
+//
+// src/cuda/transfer_fields_cuda.cu
+//
+
void FC_FUNC_(transfer_fields_el_to_device,
- TRANSFER_FIELDS_EL_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f){}
+ TRANSFER_FIELDS_EL_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_fields_el_from_device,
- TRANSFER_FIELDS_EL_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f){}
+ TRANSFER_FIELDS_EL_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_fields_to_device,
TRANSFER_B_FIELDS_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
- long* Mesh_pointer_f){}
+ long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_fields_from_device,
- TRANSFER_B_FIELDS_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,long* Mesh_pointer_f){}
+ TRANSFER_B_FIELDS_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_accel_to_device,
- TRNASFER_ACCEL_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f){}
+ TRNASFER_ACCEL_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_accel_from_device,
- TRANSFER_ACCEL_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f){}
+ TRANSFER_ACCEL_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
+
void FC_FUNC_(transfer_b_accel_from_device,
- TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, realw* b_accel,long* Mesh_pointer_f){}
+ TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, realw* b_accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_sigma_from_device,
- TRANSFER_SIGMA_FROM_DEVICE)(int* size, realw* sigma_kl,long* Mesh_pointer_f){}
+ TRANSFER_SIGMA_FROM_DEVICE)(int* size, realw* sigma_kl,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_displ_from_device,
- TRANSFER_B_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f){}
+ TRANSFER_B_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_displ_from_device,
- TRANSFER_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f){}
+ TRANSFER_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {}
-/*
void FC_FUNC_(transfer_compute_kernel_answers_from_device,
TRANSFER_COMPUTE_KERNEL_ANSWERS_FROM_DEVICE)(long* Mesh_pointer,
realw* rho_kl,int* size_rho,
realw* mu_kl, int* size_mu,
- realw* kappa_kl, int* size_kappa){}
-*/
+ realw* kappa_kl, int* size_kappa) {}
-/*
void FC_FUNC_(transfer_compute_kernel_fields_from_device,
TRANSFER_COMPUTE_KERNEL_FIELDS_FROM_DEVICE)(long* Mesh_pointer,
realw* accel, int* size_accel,
@@ -604,8 +644,7 @@
realw* kappa_kl, int* size_kappa,
realw* epsilon_trace_over_3,
realw* b_epsilon_trace_over_3,
- int* size_epsilon_trace_over_3) {}
-*/
+ int* size_epsilon_trace_over_3) {}
void FC_FUNC_(transfer_b_fields_att_to_device,
TRANSFER_B_FIELDS_ATT_TO_DEVICE)(long* Mesh_pointer,
@@ -616,7 +655,7 @@
realw* b_epsilondev_xy,
realw* b_epsilondev_xz,
realw* b_epsilondev_yz,
- int* size_epsilondev){}
+ int* size_epsilondev) {}
void FC_FUNC_(transfer_fields_att_from_device,
TRANSFER_FIELDS_ATT_FROM_DEVICE)(long* Mesh_pointer,
@@ -627,28 +666,27 @@
realw* epsilondev_xy,
realw* epsilondev_xz,
realw* epsilondev_yz,
- int* size_epsilondev){}
+ int* size_epsilondev) {}
void FC_FUNC_(transfer_kernels_el_to_host,
TRANSFER_KERNELS_EL_TO_HOST)(long* Mesh_pointer,
realw* h_rho_kl,
realw* h_mu_kl,
realw* h_kappa_kl,
- int* NSPEC_AB){}
+ int* NSPEC_AB) {}
void FC_FUNC_(transfer_kernels_noise_to_host,
TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
- realw* h_Sigma_kl,
- int* NSPEC_AB){}
+ realw* h_Sigma_kl,
+ int* NSPEC_AB) {}
-
void FC_FUNC_(transfer_fields_ac_to_device,
TRANSFER_FIELDS_AC_TO_DEVICE)(
int* size,
realw* potential_acoustic,
realw* potential_dot_acoustic,
realw* potential_dot_dot_acoustic,
- long* Mesh_pointer_f){}
+ long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_fields_ac_to_device,
TRANSFER_B_FIELDS_AC_TO_DEVICE)(
@@ -656,14 +694,14 @@
realw* b_potential_acoustic,
realw* b_potential_dot_acoustic,
realw* b_potential_dot_dot_acoustic,
- long* Mesh_pointer_f){}
+ long* Mesh_pointer_f) {}
-void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
- int* size,
- realw* potential_acoustic,
- realw* potential_dot_acoustic,
- realw* potential_dot_dot_acoustic,
- long* Mesh_pointer_f){}
+void FC_FUNC_(transfer_fields_ac_from_device,
+ TRANSFER_FIELDS_AC_FROM_DEVICE)(int* size,
+ realw* potential_acoustic,
+ realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_fields_ac_from_device,
TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
@@ -671,37 +709,41 @@
realw* b_potential_acoustic,
realw* b_potential_dot_acoustic,
realw* b_potential_dot_dot_acoustic,
- long* Mesh_pointer_f){}
+ long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_dot_dot_from_device,
- TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, realw* potential_dot_dot_acoustic,long* Mesh_pointer_f){}
+ TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, realw* potential_dot_dot_acoustic,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_dot_dot_from_device,
- TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, realw* b_potential_dot_dot_acoustic,long* Mesh_pointer_f){}
+ TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, realw* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_kernels_ac_to_host,
TRANSFER_KERNELS_AC_TO_HOST)(long* Mesh_pointer,
realw* h_rho_ac_kl,
realw* h_kappa_ac_kl,
- int* NSPEC_AB){}
+ int* NSPEC_AB) {}
void FC_FUNC_(transfer_kernels_hess_el_tohost,
- TRANSFER_KERNELS_HESS_TO_HOST)(long* Mesh_pointer,
- realw* h_hess_kl,
- int* NSPEC_AB) {}
+ TRANSFER_KERNELS_HESS_EL_TOHOST)(long* Mesh_pointer,
+ realw* h_hess_kl,
+ int* NSPEC_AB) {}
+
void FC_FUNC_(transfer_kernels_hess_ac_tohost,
- TRANSFER_KERNELS_HESS_TO_HOST)(long* Mesh_pointer,
+ TRANSFER_KERNELS_HESS_AC_TOHOST)(long* Mesh_pointer,
realw* h_hess_ac_kl,
- int* NSPEC_AB) {}
+ int* NSPEC_AB) {}
-/* from file write_seismograms_cuda.cu */
+//
+// src/cuda/write_seismograms_cuda.cu
+//
+
void FC_FUNC_(transfer_station_el_from_device,
TRANSFER_STATION_EL_FROM_DEVICE)(realw* displ,realw* veloc,realw* accel,
realw* b_displ, realw* b_veloc, realw* b_accel,
long* Mesh_pointer_f,int* number_receiver_global,
int* ispec_selected_rec,int* ispec_selected_source,
- int* ibool,int* SIMULATION_TYPEf){}
+ int* ibool,int* SIMULATION_TYPEf) {}
void FC_FUNC_(transfer_station_ac_from_device,
TRANSFER_STATION_AC_FROM_DEVICE)(
@@ -716,7 +758,5 @@
int* ispec_selected_rec,
int* ispec_selected_source,
int* ibool,
- int* SIMULATION_TYPEf){}
+ int* SIMULATION_TYPEf) {}
-
-
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2012-02-25 01:37:33 UTC (rev 19680)
@@ -180,7 +180,7 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40056);
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_b_displ,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40056);
}
@@ -448,12 +448,12 @@
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
- int* size,
- realw* potential_acoustic,
- realw* potential_dot_acoustic,
- realw* potential_dot_dot_acoustic,
- long* Mesh_pointer_f) {
+void FC_FUNC_(transfer_fields_ac_from_device,
+ TRANSFER_FIELDS_AC_FROM_DEVICE)(int* size,
+ realw* potential_acoustic,
+ realw* potential_dot_acoustic,
+ realw* potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {
TRACE("transfer_fields_ac_from_device");
Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -129,11 +129,11 @@
logical :: fail_safe
! valence
integer :: maxval_count_ibool_outer,maxval_count_ibool_inner
-
+
! display absolute minimum possible number of colors, i.e., maximum valence (for information only)
! beware: this wastes memory (needs an additional array called "count_ibool")
logical, parameter :: DISPLAY_MIN_POSSIBLE_COLORS = .false.
-
+
! user output
if( myrank == 0 ) then
if( USE_DROUX_OPTIMIZATION ) then
@@ -150,7 +150,7 @@
nspec_inner = 0
nspec_domain = 0
do ispec=1,nspec
- ! domain elements
+ ! domain elements
if(ispec_is_d(ispec)) then
! outer/inner elements
if(is_on_a_slice_edge(ispec)) then
@@ -189,13 +189,13 @@
! gets maximum values of valence for inner and outer element points
call count_mesh_valence(ibool,is_on_a_slice_edge,ispec_is_d, &
myrank, nspec, nglob, &
- maxval_count_ibool_outer,maxval_count_ibool_inner)
- endif
+ maxval_count_ibool_outer,maxval_count_ibool_inner)
+ endif
! allocates mask
allocate(mask_ibool(nglob),stat=ier)
if( ier /= 0 ) stop 'error allocating mask_ibool array'
-
+
! entry point for fail-safe mechanism when Droux 1993 fails
999 continue
@@ -338,21 +338,21 @@
nb_colors_inner_elements = icolor - nb_colors_outer_elements
- ! Droux optimization:
- ! added this to create more balanced colors according to JJ Droux (1993)
- ! note: this might not find an optimial solution.
+ ! Droux optimization:
+ ! added this to create more balanced colors according to JJ Droux (1993)
+ ! note: this might not find an optimial solution.
! we will probably have to try a few times with increasing colors
if( try_Droux_coloring ) then
! initializes fail-safe mechanism
fail_safe = .false.
-
+
! tries to find a balanced coloring
call balance_colors_Droux(ibool,is_on_a_slice_edge,ispec_is_d, &
myrank, nspec, nglob, &
color,nb_colors_outer_elements,nb_colors_inner_elements, &
nspec_outer,nspec_inner,maxval_count_ibool_inner, &
mask_ibool,fail_safe)
-
+
! in case it fails go back to simple coloring algorithm
if( fail_safe ) then
try_Droux_coloring = .false.
@@ -445,7 +445,7 @@
integer :: ispec
integer :: iglob1,iglob2,iglob3,iglob4,iglob5,iglob6,iglob7,iglob8
integer :: ier
-
+
! allocates count array
allocate(count_ibool(nglob),stat=ier)
if( ier /= 0 ) stop 'error allocating count_ibool array'
@@ -521,7 +521,7 @@
endif
deallocate(count_ibool)
-
+
end subroutine count_mesh_valence
!
@@ -554,17 +554,17 @@
integer :: maxval_count_ibool_inner
logical :: fail_safe
-
+
! local parameters
logical, dimension(:), allocatable :: icolor_conflict_found
integer, dimension(:), allocatable :: nb_elems_in_this_color
integer :: ispec,ispec2,icolor,ncolors,icolormin,icolormax,icolor_chosen,nb_elems_in_color_chosen
integer :: nb_tries_of_Droux_1993,last_ispec_studied
integer :: ier
-
+
! debug outupt
if( myrank == 0 ) then
- write(IMAIN,*) ' balancing colors: Droux algorithm'
+ write(IMAIN,*) ' balancing colors: Droux algorithm'
write(IMAIN,*) ' initial number of outer element colors = ',nb_colors_outer_elements
write(IMAIN,*) ' initial number of inner element colors = ',nb_colors_inner_elements
write(IMAIN,*) ' initial number of total colors = ',nb_colors_outer_elements + nb_colors_inner_elements
@@ -655,7 +655,7 @@
mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
icolor_conflict_found(color(ispec2)) = .true.
-
+
endif ! domain elements
enddo
endif
@@ -745,7 +745,7 @@
integer :: last_ispec_studied
integer :: target_nb_elems_per_color,icolor_target
integer :: ier
-
+
! debug outupt
if( myrank == 0 ) then
write(IMAIN,*) ' balancing colors: simple algorithm'
@@ -770,9 +770,9 @@
if( nb_colors_outer_elements > 0 ) then
target_nb_elems_per_color = nspec_outer / nb_colors_outer_elements + 1
else
- target_nb_elems_per_color = 1
+ target_nb_elems_per_color = 1
endif
-
+
! print *,'nspec_outer,target_nb_elems_per_color = ',nspec_outer,target_nb_elems_per_color
! count the initial number of elements in each color
@@ -782,7 +782,7 @@
enddo
! do not balance the last one, because it will be balanced automatically by the others
- do icolor = icolormin,icolormax-1
+ do icolor = icolormin,icolormax-1
! if color is already balanced, do nothing
! (this works because in the initial set the number of elements per color decreases when the color number increases)
@@ -865,7 +865,7 @@
! if cannot find any other color to move this element to
if (all(icolor_conflict_found(icolormin:icolormax))) cycle
- ! loop on all the colors to determine the color with the smallest number of elements
+ ! loop on all the colors to determine the color with the smallest number of elements
! and for which there is no conflict
nb_elems_in_color_chosen = 2147000000 ! start with extremely large unrealistic value
do icolor_target = icolormin,icolormax
@@ -878,11 +878,11 @@
! move the element to that new color
! remove element from its current color
- nb_elems_in_this_color(color(ispec)) = nb_elems_in_this_color(color(ispec)) - 1
+ nb_elems_in_this_color(color(ispec)) = nb_elems_in_this_color(color(ispec)) - 1
color(ispec) = icolor_chosen
! and add it to the new color
- nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
-
+ nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
+
endif ! domain elements
enddo
@@ -908,7 +908,7 @@
enddo
! do not balance the last one, because it will be balanced automatically by the others
- do icolor = icolormin,icolormax-1
+ do icolor = icolormin,icolormax-1
! if color is already balanced, do nothing
! (this works because in the initial set the number of elements per color decreases when the color number increases)
@@ -979,7 +979,7 @@
mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
icolor_conflict_found(color(ispec2)) = .true.
-
+
endif ! domain elements
enddo
@@ -990,9 +990,9 @@
enddo
! if cannot find any other color to move this element to
- if (all(icolor_conflict_found(icolormin:icolormax))) cycle
+ if (all(icolor_conflict_found(icolormin:icolormax))) cycle
- ! loops on all the colors to determine the color with the smallest number of elements
+ ! loops on all the colors to determine the color with the smallest number of elements
! and for which there is no conflict
nb_elems_in_color_chosen = 2147000000 ! start with extremely large unrealistic value
do icolor_target = icolormin,icolormax
@@ -1008,8 +1008,8 @@
nb_elems_in_this_color(color(ispec)) = nb_elems_in_this_color(color(ispec)) - 1
color(ispec) = icolor_chosen
! and add it to the new color
- nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
-
+ nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
+
endif ! domain elements
enddo
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/check_mesh_resolution.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/check_mesh_resolution.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/check_mesh_resolution.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -61,7 +61,7 @@
integer :: NSPEC_AB_global_min,NSPEC_AB_global_max,NSPEC_AB_global_sum
integer :: NGLOB_AB_global_min,NGLOB_AB_global_max,NGLOB_AB_global_sum
integer :: ispec,sizeprocs
-
+
!********************************************************************************
! empirical choice for distorted elements to estimate time step and period resolved:
@@ -81,7 +81,7 @@
real(kind=CUSTOM_REAL),dimension(:),allocatable :: tmp1,tmp2
integer:: ier
character(len=256) :: filename,prname
-
+
! initializations
if( DT <= 0.0d0) then
DT_PRESENT = .false.
@@ -116,7 +116,7 @@
tmp1(:) = 0.0
tmp2(:) = 0.0
endif
-
+
! checks courant number & minimum resolved period for each grid cell
do ispec=1,NSPEC_AB
@@ -155,8 +155,8 @@
! debug: for vtk output
if( SAVE_MESH_FILES ) tmp1(ispec) = cmax
endif
-
-
+
+
! suggested timestep
dt_suggested = COURANT_SUGGESTED * distance_min / max( vpmax,vsmax )
dt_suggested_glob = min( dt_suggested_glob, dt_suggested)
@@ -334,7 +334,7 @@
! debug: for vtk output
if( SAVE_MESH_FILES ) then
- call create_name_database(prname,myrank,LOCAL_PATH)
+ call create_name_database(prname,myrank,LOCAL_PATH)
! courant number
if( DT_PRESENT ) then
filename = trim(prname)//'res_courant_number'
@@ -348,7 +348,7 @@
xstore,ystore,zstore,ibool, &
tmp2,filename)
deallocate(tmp1,tmp2)
- endif
+ endif
end subroutine check_mesh_resolution
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in 2012-02-25 01:37:33 UTC (rev 19680)
@@ -264,7 +264,7 @@
! try several times with one more color before giving up
logical, parameter :: USE_DROUX_OPTIMIZATION = .false.
integer, parameter :: MAX_NB_TRIES_OF_DROUX_1993 = 15
-!
+!
! postprocess the colors to balance them if Droux (1993) algorithm is not used
logical, parameter :: BALANCE_COLORS_SIMPLE_ALGO = .true.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -707,7 +707,7 @@
integer, dimension(:), allocatable :: valence_external_mesh
integer :: ispec,i,j,k,iglob,ier,count
real(kind=CUSTOM_REAL),parameter :: TOLERANCE_DISTANCE = 0.9
-
+
! detecting surface points/elements (based on valence check on NGLL points) for external mesh
allocate(valence_external_mesh(nglob),stat=ier)
if( ier /= 0 ) stop 'error allocate valence array'
@@ -724,7 +724,7 @@
+ (zstore(ibool(1,1,1,:)) - zstore(ibool(2,1,1,:)))**2 )
mindist = sqrt(mindist)
distance = TOLERANCE_DISTANCE*mindist
-
+
! sets valence value to one corresponding to process rank for points on cross-sections
do ispec = 1, nspec
do k = 1, NGLLZ
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/serial.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/serial.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/serial.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -41,13 +41,13 @@
implicit none
real :: ct
-
+
! note: for simplicity, we take cpu_time which returns the elapsed CPU time in seconds
! (instead of wall clock time for parallel MPI function)
call cpu_time(ct)
-
- wtime = ct
+ wtime = ct
+
end function wtime
!
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in 2012-02-25 01:37:33 UTC (rev 19680)
@@ -37,24 +37,32 @@
#CUDA_LIBS= -L/u/dpeter/install/cuda/lib64 -lcudart -lcublas
#MPI_INC= -I/usr/local/openmpi/1.4.3/gcc/x86_64/include
-# MR added CUDA
+# CUDA
+# with configure: ./configure --with-cuda CUDA_LIB=.. CUDA_INC=.. MPI_INC=..
+# default cuda libraries
@COND_CUDA_TRUE at CUDA_LIBS = -lcuda -lcudart -lcublas
@COND_CUDA_FALSE at CUDA_LIBS =
-# with configure: ./configure CUDA_LIB=.. CUDA_INC=.. MPI_INC=..
CUDA_LIB_LOCATION = @CUDA_LIB@
CUDA_LINK = $(CUDA_LIB_LOCATION) $(CUDA_LIBS)
CUDA_INC = @CUDA_INC@ -I../../
MPI_INC = @MPI_INC@
@COND_CUDA_TRUE at NVCC = nvcc
- at COND_CUDA_FALSE@NVCC = g++
+ at COND_CUDA_FALSE@NVCC = @CC@
@COND_CUDA_TRUE at NVCC_FLAGS = $(CUDA_INC) $(MPI_INC) $(COND_MPI_CPPFLAGS) -DCUDA -gencode=arch=compute_20,code=sm_20
@COND_CUDA_FALSE at NVCC_FLAGS = $(MPI_INC) $(COND_MPI_CPPFLAGS)
+# OpenMP
+# with configure: ./configure --with-openmp FLAGS_NO_CHECK="-openmp .." OPENMP_LIB=..
+ at COND_OPENMP_TRUE@OPENMP_LIBS = $(OPENMP_LIB)
+ at COND_OPENMP_FALSE@OPENMP_LIBS =
+ at COND_OPENMP_TRUE@COND_OPENMP_FFLAGS = -DOPENMP_MODE
+ at COND_OPENMP_FALSE@COND_OPENMP_FFLAGS =
+
FC = @FC@
FCFLAGS = #@FCFLAGS@
MPIFC = @MPIFC@
@@ -76,10 +84,10 @@
# Output files directory
OUTPUT=../../in_out_files/OUTPUT_FILES
-FCCOMPILE_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_CHECK) -I$(SHARED)
-FCCOMPILE_NO_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_NO_CHECK) -I$(SHARED)
-MPIFCCOMPILE_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_CHECK) -I$(SHARED)
-MPIFCCOMPILE_NO_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_NO_CHECK) -I$(SHARED)
+FCCOMPILE_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_CHECK) $(COND_OPENMP_FFLAGS) -I$(SHARED)
+FCCOMPILE_NO_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_NO_CHECK) $(COND_OPENMP_FFLAGS) -I$(SHARED)
+MPIFCCOMPILE_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_CHECK) $(COND_OPENMP_FFLAGS) -I$(SHARED)
+MPIFCCOMPILE_NO_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_NO_CHECK) $(COND_OPENMP_FFLAGS) -I$(SHARED)
@COND_MPI_TRUE at FCLINK = $(MPIFCCOMPILE_NO_CHECK)
@COND_MPI_FALSE at FCLINK = $(FCCOMPILE_NO_CHECK)
@@ -164,7 +172,6 @@
$O/compute_forces_elastic.o \
$O/compute_forces_elastic_Dev.o \
$O/compute_forces_elastic_noDev.o \
- $O/compute_forces_elastic_Dev_openmp.o \
$O/compute_add_sources_acoustic.o \
$O/compute_add_sources_elastic.o \
$O/compute_coupling_acoustic_el.o \
@@ -198,6 +205,10 @@
@COND_MPI_TRUE at COND_MPI_OBJECTS = $O/parallel.o
@COND_MPI_FALSE at COND_MPI_OBJECTS = $O/serial.o
+# objects toggled between openmp and non-openmp version
+ at COND_OPENMP_TRUE@COND_OPENMP_OBJECTS = $O/compute_forces_elastic_Dev_openmp.openmp.o
+ at COND_OPENMP_FALSE@COND_OPENMP_OBJECTS =
+
LIBSPECFEM = $L/libspecfem.a
# objects for the pure Fortran version
@@ -231,8 +242,8 @@
# rules for the pure Fortran version
@COND_PYRE_FALSE@# solver also depends on values from mesher
- at COND_PYRE_FALSE@xspecfem3D: $(XSPECFEM_OBJECTS) $(COND_MPI_OBJECTS)
- at COND_PYRE_FALSE@ ${FCLINK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(COND_MPI_OBJECTS) $(MPILIBS) $(CUDA_LINK)
+ at COND_PYRE_FALSE@xspecfem3D: $(XSPECFEM_OBJECTS) $(COND_MPI_OBJECTS) $(COND_OPENMP_OBJECTS)
+ at COND_PYRE_FALSE@ ${FCLINK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(COND_MPI_OBJECTS) $(MPILIBS) $(COND_OPENMP_OBJECTS) $(OPENMP_LIBS) $(CUDA_LINK)
@COND_PYRE_FALSE@
convolve_source_timefunction: xconvolve_source_timefunction
@@ -283,10 +294,22 @@
$O/%.o: %.f90 $(SHARED)constants.h
${FCCOMPILE_NO_CHECK} -c -o $@ $<
+$O/%.o: %.F90 $(SHARED)constants.h
+ ${FCCOMPILE_NO_CHECK} -c -o $@ $<
+
$O/%.shared.o: ${SHARED}%.f90 $(SHARED)constants.h
${FCCOMPILE_NO_CHECK} -c -o $@ $<
+$O/%.shared.o: ${SHARED}%.F90 $(SHARED)constants.h
+ ${FCCOMPILE_NO_CHECK} -c -o $@ $<
+
###
+### OpenMP compilation
+###
+$O/%.openmp.o: %.f90 $(SHARED)constants.h
+ ${FCCOMPILE_NO_CHECK} -c -o $@ $<
+
+###
### CUDA compilation
###
$O/%.cuda.o: ${CUDAD}%.cu ../../config.h $(CUDAD)mesh_constants_cuda.h $(CUDAD)prepare_constants_cuda.h
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -259,64 +259,64 @@
if( ier /= 0 ) stop 'error allocating array adj_sourcearray'
if (.not. SU_FORMAT) then
- !!! read ascii adjoint sources
- irec_local = 0
- do irec = 1, nrec
- ! compute source arrays
- if (myrank == islice_selected_rec(irec)) then
- irec_local = irec_local + 1
- ! reads in **sta**.**net**.**LH**.adj files
- adj_source_file = trim(station_name(irec))//'.'//trim(network_name(irec))
- call compute_arrays_adjoint_source(myrank,adj_source_file, &
- xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
- adj_sourcearray, xigll,yigll,zigll, &
- it_sub_adj,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC)
- do itime = 1,NTSTEP_BETWEEN_READ_ADJSRC
- adj_sourcearrays(irec_local,itime,:,:,:,:) = adj_sourcearray(itime,:,:,:,:)
- enddo
+ !!! read ascii adjoint sources
+ irec_local = 0
+ do irec = 1, nrec
+ ! compute source arrays
+ if (myrank == islice_selected_rec(irec)) then
+ irec_local = irec_local + 1
+ ! reads in **sta**.**net**.**LH**.adj files
+ adj_source_file = trim(station_name(irec))//'.'//trim(network_name(irec))
+ call compute_arrays_adjoint_source(myrank,adj_source_file, &
+ xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
+ adj_sourcearray, xigll,yigll,zigll, &
+ it_sub_adj,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC)
+ do itime = 1,NTSTEP_BETWEEN_READ_ADJSRC
+ adj_sourcearrays(irec_local,itime,:,:,:,:) = adj_sourcearray(itime,:,:,:,:)
+ enddo
- endif
- enddo
+ endif
+ enddo
else
- !!! read SU adjoint sources
- ! range of the block we need to read
- it_start = NSTEP - it_sub_adj*NTSTEP_BETWEEN_READ_ADJSRC + 1
- it_end = it_start + NTSTEP_BETWEEN_READ_ADJSRC - 1
- write(procname,"(i4)") myrank
- ! read adjoint sources
- open(unit=IIN_SU1, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dx_SU.adj', &
- access='direct',recl=240+4*NSTEP)
- open(unit=IIN_SU2, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dy_SU.adj', &
- access='direct',recl=240+4*NSTEP)
- open(unit=IIN_SU3, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dz_SU.adj', &
- access='direct',recl=240+4*NSTEP)
- do irec_local = 1,nrec_local
- irec = number_receiver_global(irec_local)
- read(IIN_SU1,rec=irec_local) r4head, adj_temp
- adj_src(:,1)=adj_temp(it_start:it_end)
- read(IIN_SU2,rec=irec_local) r4head, adj_temp
- adj_src(:,2)=adj_temp(it_start:it_end)
- read(IIN_SU3,rec=irec_local) r4head, adj_temp
- adj_src(:,3)=adj_temp(it_start:it_end)
- ! lagrange interpolators for receiver location
- call lagrange_any(xi_receiver(irec),NGLLX,xigll,hxir,hpxir)
- call lagrange_any(eta_receiver(irec),NGLLY,yigll,hetar,hpetar)
- call lagrange_any(gamma_receiver(irec),NGLLZ,zigll,hgammar,hpgammar)
- ! interpolates adjoint source onto GLL points within this element
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
- enddo
- enddo
- enddo
- do itime = 1,NTSTEP_BETWEEN_READ_ADJSRC
- adj_sourcearrays(irec_local,itime,:,:,:,:) = adj_sourcearray(itime,:,:,:,:)
- enddo
- enddo
- close(IIN_SU1)
- close(IIN_SU2)
- close(IIN_SU3)
+ !!! read SU adjoint sources
+ ! range of the block we need to read
+ it_start = NSTEP - it_sub_adj*NTSTEP_BETWEEN_READ_ADJSRC + 1
+ it_end = it_start + NTSTEP_BETWEEN_READ_ADJSRC - 1
+ write(procname,"(i4)") myrank
+ ! read adjoint sources
+ open(unit=IIN_SU1, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dx_SU.adj', &
+ access='direct',recl=240+4*NSTEP)
+ open(unit=IIN_SU2, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dy_SU.adj', &
+ access='direct',recl=240+4*NSTEP)
+ open(unit=IIN_SU3, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dz_SU.adj', &
+ access='direct',recl=240+4*NSTEP)
+ do irec_local = 1,nrec_local
+ irec = number_receiver_global(irec_local)
+ read(IIN_SU1,rec=irec_local) r4head, adj_temp
+ adj_src(:,1)=adj_temp(it_start:it_end)
+ read(IIN_SU2,rec=irec_local) r4head, adj_temp
+ adj_src(:,2)=adj_temp(it_start:it_end)
+ read(IIN_SU3,rec=irec_local) r4head, adj_temp
+ adj_src(:,3)=adj_temp(it_start:it_end)
+ ! lagrange interpolators for receiver location
+ call lagrange_any(xi_receiver(irec),NGLLX,xigll,hxir,hpxir)
+ call lagrange_any(eta_receiver(irec),NGLLY,yigll,hetar,hpetar)
+ call lagrange_any(gamma_receiver(irec),NGLLZ,zigll,hgammar,hpgammar)
+ ! interpolates adjoint source onto GLL points within this element
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
+ enddo
+ enddo
+ enddo
+ do itime = 1,NTSTEP_BETWEEN_READ_ADJSRC
+ adj_sourcearrays(irec_local,itime,:,:,:,:) = adj_sourcearray(itime,:,:,:,:)
+ enddo
+ enddo
+ close(IIN_SU1)
+ close(IIN_SU2)
+ close(IIN_SU3)
endif !if(.not. SU_FORMAT)
deallocate(adj_sourcearray)
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -153,7 +153,7 @@
endif
- if(PML) then
+ if(PML) then
! transfers potentials to CPU
if(GPU_MODE) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
@@ -186,9 +186,10 @@
! absorbing boundaries
if(ABSORBING_CONDITIONS) then
- if( PML .and. PML_USE_SOMMERFELD ) then
- ! adds a Sommerfeld condition on the domain's absorbing boundaries
- call PML_acoustic_abs_boundaries(phase_is_inner,NSPEC_AB,NGLOB_AB,&
+ if( PML ) then
+ if( PML_USE_SOMMERFELD ) then
+ ! adds a Sommerfeld condition on the domain's absorbing boundaries
+ call PML_acoustic_abs_boundaries(phase_is_inner,NSPEC_AB,NGLOB_AB,&
abs_boundary_jacobian2Dw,abs_boundary_ijk,abs_boundary_ispec, &
num_abs_boundary_faces, &
kappastore,ibool,ispec_is_inner, &
@@ -197,10 +198,10 @@
num_PML_ispec,PML_ispec,ispec_is_PML_inum,&
chi1_dot,chi2_t,chi2_t_dot,chi3_dot,chi4_dot,&
chi1_dot_dot,chi3_dot_dot,chi4_dot_dot)
- ! transfers potentials back to GPU
- if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+ ! transfers potentials back to GPU
+ if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
-
+ endif
else
! Stacey boundary conditions
call compute_stacey_acoustic(NSPEC_AB,NGLOB_AB, &
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 (from rev 19670, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -0,0 +1,855 @@
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! elastic solver
+
+subroutine compute_forces_elastic()
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+
+ implicit none
+
+ integer:: iphase
+ logical:: phase_is_inner
+
+! distinguishes two runs: for points on MPI interfaces, and points within the partitions
+ do iphase=1,2
+
+ !first for points on MPI interfaces
+ if( iphase == 1 ) then
+ phase_is_inner = .false.
+ else
+ phase_is_inner = .true.
+ endif
+
+
+! elastic term
+ if( .NOT. GPU_MODE ) then
+ if(USE_DEVILLE_PRODUCTS) then
+ ! uses Deville (2002) optimizations
+ call compute_forces_elastic_Dev_sim1(iphase)
+
+ ! adjoint simulations: backward/reconstructed wavefield
+ if( SIMULATION_TYPE == 3 ) &
+ call compute_forces_elastic_Dev_sim3(iphase)
+
+ else
+ ! no optimizations used
+ call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION,&
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval,&
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy,&
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ ! adjoint simulations: backward/reconstructed wavefield
+ if( SIMULATION_TYPE == 3 ) &
+ call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,&
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION,&
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval,&
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,&
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ endif
+
+ else
+ ! on GPU
+ ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
+ call compute_forces_elastic_cuda(Mesh_pointer, iphase, &
+ nspec_outer_elastic, &
+ nspec_inner_elastic, &
+ SIMULATION_TYPE, &
+ COMPUTE_AND_STORE_STRAIN,ATTENUATION,ANISOTROPY)
+ endif ! GPU_MODE
+
+
+! adds elastic absorbing boundary term to acceleration (Stacey conditions)
+ if(ABSORBING_CONDITIONS) &
+ call compute_stacey_elastic(NSPEC_AB,NGLOB_AB,accel, &
+ ibool,ispec_is_inner,phase_is_inner, &
+ abs_boundary_normal,abs_boundary_jacobian2Dw, &
+ abs_boundary_ijk,abs_boundary_ispec, &
+ num_abs_boundary_faces, &
+ veloc,rho_vp,rho_vs, &
+ ispec_is_elastic,SIMULATION_TYPE,SAVE_FORWARD, &
+ NSTEP,it,NGLOB_ADJOINT,b_accel, &
+ b_num_abs_boundary_faces,b_reclen_field,b_absorb_field,&
+ GPU_MODE,Mesh_pointer)
+
+! acoustic coupling
+ if( ACOUSTIC_SIMULATION ) then
+ if( .NOT. GPU_MODE ) then
+ call compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, &
+ ibool,accel,potential_dot_dot_acoustic, &
+ num_coupling_ac_el_faces, &
+ coupling_ac_el_ispec,coupling_ac_el_ijk, &
+ coupling_ac_el_normal, &
+ coupling_ac_el_jacobian2Dw, &
+ ispec_is_inner,phase_is_inner)
+
+ ! adjoint simulations
+ if( SIMULATION_TYPE == 3 ) &
+ call compute_coupling_elastic_ac(NSPEC_ADJOINT,NGLOB_ADJOINT, &
+ ibool,b_accel,b_potential_dot_dot_acoustic, &
+ num_coupling_ac_el_faces, &
+ coupling_ac_el_ispec,coupling_ac_el_ijk, &
+ coupling_ac_el_normal, &
+ coupling_ac_el_jacobian2Dw, &
+ ispec_is_inner,phase_is_inner)
+ else
+ ! on GPU
+ if( num_coupling_ac_el_faces > 0 ) &
+ call compute_coupling_el_ac_cuda(Mesh_pointer,phase_is_inner, &
+ num_coupling_ac_el_faces,SIMULATION_TYPE)
+
+ endif
+ endif
+
+
+! poroelastic coupling
+! not implemented yet
+! if( POROELASTIC_SIMULATION ) &
+! call compute_coupling_elastic_poro()
+
+! adds source term (single-force/moment-tensor solution)
+ call compute_add_sources_elastic( NSPEC_AB,NGLOB_AB,accel, &
+ ibool,ispec_is_inner,phase_is_inner, &
+ NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,&
+ xi_source,eta_source,gamma_source,nu_source, &
+ hdur,hdur_gaussian,tshift_cmt,dt,t0,sourcearrays, &
+ ispec_is_elastic,SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
+ nrec,islice_selected_rec,ispec_selected_rec, &
+ nadj_rec_local,adj_sourcearrays,b_accel, &
+ NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY, &
+ GPU_MODE, Mesh_pointer )
+
+ ! assemble all the contributions between slices using MPI
+ if( phase_is_inner .eqv. .false. ) then
+ ! sends accel values to corresponding MPI interface neighbors
+ if(.NOT. GPU_MODE) then
+ call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,accel, &
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+ my_neighbours_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+ else ! GPU_MODE==1
+ call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, accel,&
+ buffer_send_vector_ext_mesh,&
+ num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
+ nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,1) ! <-- 1 == fwd accel
+ call assemble_MPI_vector_send_cuda(NPROC, &
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,&
+ my_neighbours_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+ endif ! GPU_MODE
+
+ ! adjoint simulations
+ if( SIMULATION_TYPE == 3 ) then
+ if(.NOT. GPU_MODE) then
+ call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_accel, &
+ b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+ my_neighbours_ext_mesh, &
+ b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
+ else ! GPU_MODE == 1
+ call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, b_accel,&
+ b_buffer_send_vector_ext_mesh,&
+ num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
+ nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
+ call assemble_MPI_vector_send_cuda(NPROC, &
+ b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,&
+ my_neighbours_ext_mesh, &
+ b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
+
+ endif ! GPU
+ endif !adjoint
+
+ else
+ ! waits for send/receive requests to be completed and assembles values
+ if(.NOT. GPU_MODE) then
+ call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_AB,accel, &
+ buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+ else ! GPU_MODE == 1
+ call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer,&
+ buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh,1)
+ endif
+ ! adjoint simulations
+ if( SIMULATION_TYPE == 3 ) then
+ if(.NOT. GPU_MODE) then
+ call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_accel, &
+ b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
+ else ! GPU_MODE == 1
+ call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer,&
+ b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh,3)
+ endif
+ endif !adjoint
+
+ endif
+
+ !! DK DK May 2009: removed this because now each slice of a CUBIT + SCOTCH mesh
+ !! DK DK May 2009: has a different number of spectral elements and therefore
+ !! DK DK May 2009: only the general non-blocking MPI routines assemble_MPI_vector_ext_mesh_s
+ !! DK DK May 2009: and assemble_MPI_vector_ext_mesh_w above can be used.
+ !! DK DK May 2009: For adjoint runs below (SIMULATION_TYPE == 3) they should be used as well.
+
+ enddo
+
+ ! multiplies with inverse of mass matrix (note: rmass has been inverted already)
+ if(.NOT. GPU_MODE) then
+ accel(1,:) = accel(1,:)*rmass(:)
+ accel(2,:) = accel(2,:)*rmass(:)
+ accel(3,:) = accel(3,:)*rmass(:)
+ ! adjoint simulations
+ if (SIMULATION_TYPE == 3) then
+ b_accel(1,:) = b_accel(1,:)*rmass(:)
+ b_accel(2,:) = b_accel(2,:)*rmass(:)
+ b_accel(3,:) = b_accel(3,:)*rmass(:)
+ endif !adjoint
+ else ! GPU_MODE == 1
+ call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS)
+ endif
+
+! updates acceleration with ocean load term
+ if(OCEANS) then
+ if( .NOT. GPU_MODE ) then
+ call elastic_ocean_load(NSPEC_AB,NGLOB_AB, &
+ ibool,rmass,rmass_ocean_load,accel, &
+ free_surface_normal,free_surface_ijk,free_surface_ispec, &
+ num_free_surface_faces,SIMULATION_TYPE, &
+ NGLOB_ADJOINT,b_accel)
+ else
+ ! on GPU
+ call elastic_ocean_load_cuda(Mesh_pointer,SIMULATION_TYPE)
+ endif
+ endif
+
+! updates velocities
+! Newmark finite-difference time scheme with elastic domains:
+! (see e.g. Hughes, 1987; Chaljub et al., 2003)
+!
+! u(t+delta_t) = u(t) + delta_t v(t) + 1/2 delta_t**2 a(t)
+! v(t+delta_t) = v(t) + 1/2 delta_t a(t) + 1/2 delta_t a(t+delta_t)
+! a(t+delta_t) = 1/M_elastic ( -K_elastic u(t+delta) + B_elastic chi_dot_dot(t+delta_t) + f( t+delta_t) )
+!
+! where
+! u, v, a are displacement,velocity & acceleration
+! M is mass matrix, K stiffness matrix and B boundary term for acoustic/elastic domains
+! f denotes a source term (acoustic/elastic)
+! chi_dot_dot is acoustic (fluid) potential ( dotted twice with respect to time)
+!
+! corrector:
+! updates the velocity term which requires a(t+delta)
+! GPU_MODE: this is handled in 'kernel_3' at the same time as accel*rmass
+ if(.NOT. GPU_MODE) then
+ veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
+ ! adjoint simulations
+ if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
+ else ! GPU_MODE == 1
+ if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2)
+ endif
+
+
+end subroutine compute_forces_elastic
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+subroutine elastic_ocean_load(NSPEC_AB,NGLOB_AB, &
+ ibool,rmass,rmass_ocean_load,accel, &
+ free_surface_normal,free_surface_ijk,free_surface_ispec, &
+ num_free_surface_faces,SIMULATION_TYPE, &
+ NGLOB_ADJOINT,b_accel)
+
+! updates acceleration with ocean load term:
+! approximates ocean-bottom continuity of pressure & displacement for longer period waves (> ~20s ),
+! assuming incompressible fluid column above bathymetry ocean bottom
+
+ implicit none
+
+ include 'constants.h'
+
+ integer :: NSPEC_AB,NGLOB_AB
+
+ real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_AB),intent(inout) :: accel
+ real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: rmass,rmass_ocean_load
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
+
+ ! free surface
+ integer :: num_free_surface_faces
+ real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces)
+ integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
+ integer :: free_surface_ispec(num_free_surface_faces)
+
+ ! adjoint simulations
+ integer :: SIMULATION_TYPE,NGLOB_ADJOINT
+ real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_ADJOINT):: b_accel
+
+! local parameters
+ real(kind=CUSTOM_REAL) :: nx,ny,nz
+ real(kind=CUSTOM_REAL) :: additional_term,force_normal_comp
+ integer :: i,j,k,ispec,iglob
+ integer :: igll,iface
+ logical,dimension(NGLOB_AB) :: updated_dof_ocean_load
+ ! adjoint locals
+ real(kind=CUSTOM_REAL) :: b_additional_term,b_force_normal_comp
+
+ ! initialize the updates
+ updated_dof_ocean_load(:) = .false.
+
+ ! for surface elements exactly at the top of the model (ocean bottom)
+ do iface = 1,num_free_surface_faces
+
+ ispec = free_surface_ispec(iface)
+ do igll = 1, NGLLSQUARE
+ i = free_surface_ijk(1,igll,iface)
+ j = free_surface_ijk(2,igll,iface)
+ k = free_surface_ijk(3,igll,iface)
+
+ ! get global point number
+ iglob = ibool(i,j,k,ispec)
+
+ ! only update once
+ if(.not. updated_dof_ocean_load(iglob)) then
+
+ ! get normal
+ nx = free_surface_normal(1,igll,iface)
+ ny = free_surface_normal(2,igll,iface)
+ nz = free_surface_normal(3,igll,iface)
+
+ ! make updated component of right-hand side
+ ! we divide by rmass() which is 1 / M
+ ! we use the total force which includes the Coriolis term above
+ force_normal_comp = ( accel(1,iglob)*nx + &
+ accel(2,iglob)*ny + &
+ accel(3,iglob)*nz ) / rmass(iglob)
+
+ additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * force_normal_comp
+
+ accel(1,iglob) = accel(1,iglob) + additional_term * nx
+ accel(2,iglob) = accel(2,iglob) + additional_term * ny
+ accel(3,iglob) = accel(3,iglob) + additional_term * nz
+
+ ! adjoint simulations
+ if (SIMULATION_TYPE == 3) then
+ b_force_normal_comp = ( b_accel(1,iglob)*nx + &
+ b_accel(2,iglob)*ny + &
+ b_accel(3,iglob)*nz) / rmass(iglob)
+ b_additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * b_force_normal_comp
+
+ b_accel(1,iglob) = b_accel(1,iglob) + b_additional_term * nx
+ b_accel(2,iglob) = b_accel(2,iglob) + b_additional_term * ny
+ b_accel(3,iglob) = b_accel(3,iglob) + b_additional_term * nz
+ endif !adjoint
+
+ ! done with this point
+ updated_dof_ocean_load(iglob) = .true.
+
+ endif
+
+ enddo ! igll
+ enddo ! iface
+
+end subroutine elastic_ocean_load
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! distributes routines according to chosen NGLLX in constants.h
+
+!daniel: note -- i put it here rather than in compute_forces_elastic_Dev.f90 because compiler complains that:
+! " The storage extent of the dummy argument exceeds that of the actual argument. "
+
+subroutine compute_forces_elastic_Dev_sim1(iphase)
+
+! forward simulations
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+
+ implicit none
+
+ integer,intent(in) :: iphase
+
+ select case(NGLLX)
+
+ case (5)
+
+!----------------------------------------------------------------------------------------------
+
+! OpenMP routine flag for testing & benchmarking forward runs only
+! configure additional flag, e.g.: FLAGS_NO_CHECK="-O3 -DOPENMP_MODE -openmp"
+
+!----------------------------------------------------------------------------------------------
+#ifdef OPENMP_MODE
+ call compute_forces_elastic_Dev_openmp(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,&
+ phase_ispec_inner_elastic,&
+ num_colors_outer_elastic,num_colors_inner_elastic)
+#else
+ call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+#endif
+
+ case (6)
+ call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (7)
+ call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (8)
+ call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (9)
+ call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (10)
+ call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case default
+
+ stop 'error no Deville routine available for chosen NGLLX'
+
+ end select
+
+end subroutine compute_forces_elastic_Dev_sim1
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+subroutine compute_forces_elastic_Dev_sim3(iphase)
+
+! uses backward/reconstructed displacement and acceleration arrays
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+
+ implicit none
+
+ integer,intent(in) :: iphase
+
+ select case(NGLLX)
+
+ case (5)
+ call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (6)
+ call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (7)
+ call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (8)
+ call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (9)
+ call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case (10)
+ call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB, &
+ b_displ,b_accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common, &
+ b_alphaval,b_betaval,b_gammaval, &
+ NSPEC_ATTENUATION_AB, &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+ is_moho_top,is_moho_bot, &
+ b_dsdx_top,b_dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+ phase_ispec_inner_elastic )
+
+ case default
+
+ stop 'error no Deville routine available for chosen NGLLX'
+
+ end select
+
+
+end subroutine compute_forces_elastic_Dev_sim3
+
+
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -1,857 +0,0 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! elastic solver
-
-subroutine compute_forces_elastic()
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
-
- implicit none
-
- integer:: iphase
- logical:: phase_is_inner
-
-! distinguishes two runs: for points on MPI interfaces, and points within the partitions
- do iphase=1,2
-
- !first for points on MPI interfaces
- if( iphase == 1 ) then
- phase_is_inner = .false.
- else
- phase_is_inner = .true.
- endif
-
-
-! elastic term
- if( .NOT. GPU_MODE ) then
- if(USE_DEVILLE_PRODUCTS) then
- ! uses Deville (2002) optimizations
- call compute_forces_elastic_Dev_sim1(iphase)
-
- ! adjoint simulations: backward/reconstructed wavefield
- if( SIMULATION_TYPE == 3 ) &
- call compute_forces_elastic_Dev_sim3(iphase)
-
- else
- ! no optimizations used
- call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION,&
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval,&
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy,&
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- ! adjoint simulations: backward/reconstructed wavefield
- if( SIMULATION_TYPE == 3 ) &
- call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,&
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_yy,hprime_zz, &
- hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION,&
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval,&
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,&
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- endif
-
- else
- ! on GPU
- ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
- call compute_forces_elastic_cuda(Mesh_pointer, iphase, &
- nspec_outer_elastic, &
- nspec_inner_elastic, &
- SIMULATION_TYPE, &
- COMPUTE_AND_STORE_STRAIN,ATTENUATION,ANISOTROPY)
- endif ! GPU_MODE
-
-
-! adds elastic absorbing boundary term to acceleration (Stacey conditions)
- if(ABSORBING_CONDITIONS) &
- call compute_stacey_elastic(NSPEC_AB,NGLOB_AB,accel, &
- ibool,ispec_is_inner,phase_is_inner, &
- abs_boundary_normal,abs_boundary_jacobian2Dw, &
- abs_boundary_ijk,abs_boundary_ispec, &
- num_abs_boundary_faces, &
- veloc,rho_vp,rho_vs, &
- ispec_is_elastic,SIMULATION_TYPE,SAVE_FORWARD, &
- NSTEP,it,NGLOB_ADJOINT,b_accel, &
- b_num_abs_boundary_faces,b_reclen_field,b_absorb_field,&
- GPU_MODE,Mesh_pointer)
-
-! acoustic coupling
- if( ACOUSTIC_SIMULATION ) then
- if( .NOT. GPU_MODE ) then
- call compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, &
- ibool,accel,potential_dot_dot_acoustic, &
- num_coupling_ac_el_faces, &
- coupling_ac_el_ispec,coupling_ac_el_ijk, &
- coupling_ac_el_normal, &
- coupling_ac_el_jacobian2Dw, &
- ispec_is_inner,phase_is_inner)
-
- ! adjoint simulations
- if( SIMULATION_TYPE == 3 ) &
- call compute_coupling_elastic_ac(NSPEC_ADJOINT,NGLOB_ADJOINT, &
- ibool,b_accel,b_potential_dot_dot_acoustic, &
- num_coupling_ac_el_faces, &
- coupling_ac_el_ispec,coupling_ac_el_ijk, &
- coupling_ac_el_normal, &
- coupling_ac_el_jacobian2Dw, &
- ispec_is_inner,phase_is_inner)
- else
- ! on GPU
- if( num_coupling_ac_el_faces > 0 ) &
- call compute_coupling_el_ac_cuda(Mesh_pointer,phase_is_inner, &
- num_coupling_ac_el_faces,SIMULATION_TYPE)
-
- endif
- endif
-
-
-! poroelastic coupling
-! not implemented yet
-! if( POROELASTIC_SIMULATION ) &
-! call compute_coupling_elastic_poro()
-
-! adds source term (single-force/moment-tensor solution)
- call compute_add_sources_elastic( NSPEC_AB,NGLOB_AB,accel, &
- ibool,ispec_is_inner,phase_is_inner, &
- NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,&
- xi_source,eta_source,gamma_source,nu_source, &
- hdur,hdur_gaussian,tshift_cmt,dt,t0,sourcearrays, &
- ispec_is_elastic,SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
- nrec,islice_selected_rec,ispec_selected_rec, &
- nadj_rec_local,adj_sourcearrays,b_accel, &
- NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY, &
- GPU_MODE, Mesh_pointer )
-
- ! assemble all the contributions between slices using MPI
- if( phase_is_inner .eqv. .false. ) then
- ! sends accel values to corresponding MPI interface neighbors
- if(.NOT. GPU_MODE) then
- call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,accel, &
- buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
- my_neighbours_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
- else ! GPU_MODE==1
- call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, accel,&
- buffer_send_vector_ext_mesh,&
- num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
- nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,1) ! <-- 1 == fwd accel
- call assemble_MPI_vector_send_cuda(NPROC, &
- buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,&
- my_neighbours_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
- endif ! GPU_MODE
-
- ! adjoint simulations
- if( SIMULATION_TYPE == 3 ) then
- if(.NOT. GPU_MODE) then
- call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_accel, &
- b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
- my_neighbours_ext_mesh, &
- b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
- else ! GPU_MODE == 1
- call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, b_accel,&
- b_buffer_send_vector_ext_mesh,&
- num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
- nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
- call assemble_MPI_vector_send_cuda(NPROC, &
- b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,&
- my_neighbours_ext_mesh, &
- b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
-
- endif ! GPU
- endif !adjoint
-
- else
- ! waits for send/receive requests to be completed and assembles values
- if(.NOT. GPU_MODE) then
- call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_AB,accel, &
- buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
- max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
- else ! GPU_MODE == 1
- call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer,&
- buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
- max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh,1)
- endif
- ! adjoint simulations
- if( SIMULATION_TYPE == 3 ) then
- if(.NOT. GPU_MODE) then
- call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_accel, &
- b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
- max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
- else ! GPU_MODE == 1
- call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer,&
- b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
- max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh,3)
- endif
- endif !adjoint
-
- endif
-
- !! DK DK May 2009: removed this because now each slice of a CUBIT + SCOTCH mesh
- !! DK DK May 2009: has a different number of spectral elements and therefore
- !! DK DK May 2009: only the general non-blocking MPI routines assemble_MPI_vector_ext_mesh_s
- !! DK DK May 2009: and assemble_MPI_vector_ext_mesh_w above can be used.
- !! DK DK May 2009: For adjoint runs below (SIMULATION_TYPE == 3) they should be used as well.
-
- enddo
-
- ! multiplies with inverse of mass matrix (note: rmass has been inverted already)
- if(.NOT. GPU_MODE) then
- accel(1,:) = accel(1,:)*rmass(:)
- accel(2,:) = accel(2,:)*rmass(:)
- accel(3,:) = accel(3,:)*rmass(:)
- ! adjoint simulations
- if (SIMULATION_TYPE == 3) then
- b_accel(1,:) = b_accel(1,:)*rmass(:)
- b_accel(2,:) = b_accel(2,:)*rmass(:)
- b_accel(3,:) = b_accel(3,:)*rmass(:)
- endif !adjoint
- else ! GPU_MODE == 1
- call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS)
- endif
-
-! updates acceleration with ocean load term
- if(OCEANS) then
- if( .NOT. GPU_MODE ) then
- call elastic_ocean_load(NSPEC_AB,NGLOB_AB, &
- ibool,rmass,rmass_ocean_load,accel, &
- free_surface_normal,free_surface_ijk,free_surface_ispec, &
- num_free_surface_faces,SIMULATION_TYPE, &
- NGLOB_ADJOINT,b_accel)
- else
- ! on GPU
- call elastic_ocean_load_cuda(Mesh_pointer,SIMULATION_TYPE)
- endif
- endif
-
-! updates velocities
-! Newmark finite-difference time scheme with elastic domains:
-! (see e.g. Hughes, 1987; Chaljub et al., 2003)
-!
-! u(t+delta_t) = u(t) + delta_t v(t) + 1/2 delta_t**2 a(t)
-! v(t+delta_t) = v(t) + 1/2 delta_t a(t) + 1/2 delta_t a(t+delta_t)
-! a(t+delta_t) = 1/M_elastic ( -K_elastic u(t+delta) + B_elastic chi_dot_dot(t+delta_t) + f( t+delta_t) )
-!
-! where
-! u, v, a are displacement,velocity & acceleration
-! M is mass matrix, K stiffness matrix and B boundary term for acoustic/elastic domains
-! f denotes a source term (acoustic/elastic)
-! chi_dot_dot is acoustic (fluid) potential ( dotted twice with respect to time)
-!
-! corrector:
-! updates the velocity term which requires a(t+delta)
-! GPU_MODE: this is handled in 'kernel_3' at the same time as accel*rmass
- if(.NOT. GPU_MODE) then
- veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
- ! adjoint simulations
- if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
- else ! GPU_MODE == 1
- if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2)
- endif
-
-
-end subroutine compute_forces_elastic
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-subroutine elastic_ocean_load(NSPEC_AB,NGLOB_AB, &
- ibool,rmass,rmass_ocean_load,accel, &
- free_surface_normal,free_surface_ijk,free_surface_ispec, &
- num_free_surface_faces,SIMULATION_TYPE, &
- NGLOB_ADJOINT,b_accel)
-
-! updates acceleration with ocean load term:
-! approximates ocean-bottom continuity of pressure & displacement for longer period waves (> ~20s ),
-! assuming incompressible fluid column above bathymetry ocean bottom
-
- implicit none
-
- include 'constants.h'
-
- integer :: NSPEC_AB,NGLOB_AB
-
- real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_AB),intent(inout) :: accel
- real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: rmass,rmass_ocean_load
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
-
- ! free surface
- integer :: num_free_surface_faces
- real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces)
- integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
- integer :: free_surface_ispec(num_free_surface_faces)
-
- ! adjoint simulations
- integer :: SIMULATION_TYPE,NGLOB_ADJOINT
- real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_ADJOINT):: b_accel
-
-! local parameters
- real(kind=CUSTOM_REAL) :: nx,ny,nz
- real(kind=CUSTOM_REAL) :: additional_term,force_normal_comp
- integer :: i,j,k,ispec,iglob
- integer :: igll,iface
- logical,dimension(NGLOB_AB) :: updated_dof_ocean_load
- ! adjoint locals
- real(kind=CUSTOM_REAL) :: b_additional_term,b_force_normal_comp
-
- ! initialize the updates
- updated_dof_ocean_load(:) = .false.
-
- ! for surface elements exactly at the top of the model (ocean bottom)
- do iface = 1,num_free_surface_faces
-
- ispec = free_surface_ispec(iface)
- do igll = 1, NGLLSQUARE
- i = free_surface_ijk(1,igll,iface)
- j = free_surface_ijk(2,igll,iface)
- k = free_surface_ijk(3,igll,iface)
-
- ! get global point number
- iglob = ibool(i,j,k,ispec)
-
- ! only update once
- if(.not. updated_dof_ocean_load(iglob)) then
-
- ! get normal
- nx = free_surface_normal(1,igll,iface)
- ny = free_surface_normal(2,igll,iface)
- nz = free_surface_normal(3,igll,iface)
-
- ! make updated component of right-hand side
- ! we divide by rmass() which is 1 / M
- ! we use the total force which includes the Coriolis term above
- force_normal_comp = ( accel(1,iglob)*nx + &
- accel(2,iglob)*ny + &
- accel(3,iglob)*nz ) / rmass(iglob)
-
- additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * force_normal_comp
-
- accel(1,iglob) = accel(1,iglob) + additional_term * nx
- accel(2,iglob) = accel(2,iglob) + additional_term * ny
- accel(3,iglob) = accel(3,iglob) + additional_term * nz
-
- ! adjoint simulations
- if (SIMULATION_TYPE == 3) then
- b_force_normal_comp = ( b_accel(1,iglob)*nx + &
- b_accel(2,iglob)*ny + &
- b_accel(3,iglob)*nz) / rmass(iglob)
- b_additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * b_force_normal_comp
-
- b_accel(1,iglob) = b_accel(1,iglob) + b_additional_term * nx
- b_accel(2,iglob) = b_accel(2,iglob) + b_additional_term * ny
- b_accel(3,iglob) = b_accel(3,iglob) + b_additional_term * nz
- endif !adjoint
-
- ! done with this point
- updated_dof_ocean_load(iglob) = .true.
-
- endif
-
- enddo ! igll
- enddo ! iface
-
-end subroutine elastic_ocean_load
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! distributes routines according to chosen NGLLX in constants.h
-
-!daniel: note -- i put it here rather than in compute_forces_elastic_Dev.f90 because compiler complains that:
-! " The storage extent of the dummy argument exceeds that of the actual argument. "
-
-subroutine compute_forces_elastic_Dev_sim1(iphase)
-
-! forward simulations
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
-
- implicit none
-
- integer,intent(in) :: iphase
- logical OPENMP_MODE
- OPENMP_MODE = .false.
-
- ! write(*,*) "num_elem_colors_elastic(1)=",num_elem_colors_elastic(1)
-
- select case(NGLLX)
-
- case (5)
- if(OPENMP_MODE) then
- call compute_forces_elastic_Dev_openmp(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic,&
- num_colors_outer_elastic,num_colors_inner_elastic,&
- num_elem_colors_elastic,&
- dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3,&
- newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3,&
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3)
- else
-
- call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
- endif
-
- case (6)
- call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- case (7)
- call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- case (8)
- call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- case (9)
- call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- case (10)
- call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- case default
-
- stop 'error no Deville routine available for chosen NGLLX'
-
- end select
-
-end subroutine compute_forces_elastic_Dev_sim1
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-subroutine compute_forces_elastic_Dev_sim3(iphase)
-
-! uses backward/reconstructed displacement and acceleration arrays
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
-
- implicit none
-
- integer,intent(in) :: iphase
-
- select case(NGLLX)
-
- case (5)
- call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB, &
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- case (6)
- call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB, &
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- case (7)
- call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB, &
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- case (8)
- call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB, &
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- case (9)
- call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB, &
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- case (10)
- call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB, &
- b_displ,b_accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common, &
- b_alphaval,b_betaval,b_gammaval, &
- NSPEC_ATTENUATION_AB, &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
- is_moho_top,is_moho_bot, &
- b_dsdx_top,b_dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic )
-
- case default
-
- stop 'error no Deville routine available for chosen NGLLX'
-
- end select
-
-
-end subroutine compute_forces_elastic_Dev_sim3
-
-
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -26,32 +26,32 @@
! OpenMP Threaded variant by Max Rietmann and Olaf Schenk
-subroutine compute_forces_elastic_Dev_openmp(iphase ,NSPEC_AB,NGLOB_AB, &
- displ,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT, &
- hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION, &
- one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
- phase_ispec_inner_elastic,&
- num_colors_outer_elastic,num_colors_inner_elastic)
+ subroutine compute_forces_elastic_Dev_openmp(iphase ,NSPEC_AB,NGLOB_AB, &
+ displ,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT, &
+ hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION, &
+ one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,&
+ phase_ispec_inner_elastic,&
+ num_colors_outer_elastic,num_colors_inner_elastic)
@@ -63,7 +63,7 @@
! Trying to pass these variables as subroutine arguments ran into
! problems, so we reference them from their module, making them
- ! accessible from this subroutine
+ ! accessible from this subroutine
use specfem_par_elastic, only:dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3,&
newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3,&
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3,num_elem_colors_elastic
@@ -115,7 +115,7 @@
c55store,c56store,c66store
integer :: iphase
- integer :: num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic
+ integer :: num_phase_ispec_elastic
integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
! adjoint simulations
@@ -151,12 +151,14 @@
integer OMP_get_thread_num
integer OMP_GET_MAX_THREADS
- double precision omp_get_wtime
- double precision start_time
- double precision end_time
- double precision accumulate_time_start
- double precision accumulate_time_stop
+ ! timing
+ !double precision omp_get_wtime
+ !double precision start_time
+ !double precision end_time
+ !double precision accumulate_time_start
+ !double precision accumulate_time_stop
+
! local anisotropy parameters
real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
@@ -166,7 +168,7 @@
integer i,j,k
integer thread_id
integer NUM_THREADS
- integer omp_get_num_threads ! function
+ !integer omp_get_num_threads ! function
! coloring additions
! integer, dimension(:), allocatable :: num_elem_colors_elastic
@@ -174,8 +176,9 @@
integer num_colors_outer_elastic, num_colors_inner_elastic
integer icolor
- ! write(*,*) "num_elem_colors_elastic(1)=",num_elem_colors_elastic(1)
+ ! write(*,*) "num_elem_colors_elastic(1) = ",num_elem_colors_elastic(1)
imodulo_N_SLS = mod(N_SLS,3)
+
! NUM_THREADS = 1
NUM_THREADS = OMP_GET_MAX_THREADS()
@@ -188,8 +191,8 @@
number_of_colors = num_colors_inner_elastic + num_colors_outer_elastic
istart = num_colors_outer_elastic+1
! istart = num_colors_outer_elastic
+ endif
- endif
! "start" timer
! start_time = omp_get_wtime()
@@ -201,8 +204,8 @@
! order, stopping to synchronize threads after all the elements in a
! color are finished.
estart = 1
- do icolor = istart, number_of_colors
-
+ do icolor = istart, number_of_colors
+
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(&
!$OMP R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3,&
!$OMP factor_loc,alphaval_loc,betaval_loc,gammaval_loc,&
@@ -647,11 +650,20 @@
! Assembly of shared degrees of freedom fixed through mesh coloring
!! !$OMP ATOMIC
- accel(1,iglob) = accel(1,iglob) - (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
+ accel(1,iglob) = accel(1,iglob) &
+ - (fac1*newtempx1(i,j,k,thread_id) &
+ + fac2*newtempx2(i,j,k,thread_id) &
+ + fac3*newtempx3(i,j,k,thread_id))
!! !$OMP ATOMIC
- accel(2,iglob) = accel(2,iglob) - (fac1*newtempy1(i,j,k,thread_id) + fac2*newtempy2(i,j,k,thread_id) + fac3*newtempy3(i,j,k,thread_id))
+ accel(2,iglob) = accel(2,iglob) &
+ - (fac1*newtempy1(i,j,k,thread_id) &
+ + fac2*newtempy2(i,j,k,thread_id) &
+ + fac3*newtempy3(i,j,k,thread_id))
!! !$OMP ATOMIC
- accel(3,iglob) = accel(3,iglob) - (fac1*newtempz1(i,j,k,thread_id) + fac2*newtempz2(i,j,k,thread_id) + fac3*newtempz3(i,j,k,thread_id))
+ accel(3,iglob) = accel(3,iglob) &
+ - (fac1*newtempz1(i,j,k,thread_id) &
+ + fac2*newtempz2(i,j,k,thread_id) &
+ + fac3*newtempz3(i,j,k,thread_id))
! accel(1,iglob) = accel(1,iglob) - &
! (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
@@ -732,20 +744,19 @@
! color.
estart = estart + num_elements
- enddo ! loop over colors
-
+ enddo ! loop over colors
-
-! "stop" timer
-! end_time = omp_get_wtime()
-! write(*,*) "Total Elapsed time: ", (end_time-start_time) , "seconds. (Threads=",NUM_THREADS,")"
-! write(*,*) "Accumulate Elapsed time: ", (accumulate_time_stop-accumulate_time_start) , "seconds"
+ ! "stop" timer
+ ! end_time = omp_get_wtime()
+ ! write(*,*) "Total Elapsed time: ", (end_time-start_time) , "seconds. (Threads=",NUM_THREADS,")"
+ ! write(*,*) "Accumulate Elapsed time: ", (accumulate_time_stop-accumulate_time_start) , "seconds"
+
! These are now allocated at the beginning and never deallocated
! because the program just finishes at the end.
-
+
! deallocate(dummyx_loc)
! deallocate(dummyy_loc)
! deallocate(dummyz_loc)
@@ -767,9 +778,9 @@
! deallocate(tempz1)
! deallocate(tempz2)
! deallocate(tempz3)
-
-! accel(:,:) = accel_omp(:,:,1)
-end subroutine compute_forces_elastic_Dev_openmp
+ ! accel(:,:) = accel_omp(:,:,1)
+ end subroutine compute_forces_elastic_Dev_openmp
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -69,88 +69,82 @@
integer :: i,j,k,ispec,iglob
real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc,b_epsilondev_loc
- ! updates kernels on GPU
- if(GPU_MODE) then
- call compute_kernels_elastic_cuda(Mesh_pointer,deltat)
+ if( .not. GPU_MODE ) then
+ ! updates kernels on CPU
+ do ispec = 1, NSPEC_AB
- ! for noise simulations --- source strength kernel
- if (NOISE_TOMOGRAPHY == 3) &
- call compute_kernels_strength_noise(NGLLSQUARE*num_free_surface_faces,ibool, &
- sigma_kl,displ,deltat,it, &
- normal_x_noise,normal_y_noise,normal_z_noise, &
- noise_surface_movie, &
- NSPEC_AB,NGLOB_AB, &
- num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
- GPU_MODE,Mesh_pointer)
+ ! elastic domains
+ if( ispec_is_elastic(ispec) ) then
- ! kernels are done
- return
- endif
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool(i,j,k,ispec)
- ! updates kernels on CPU
- do ispec = 1, NSPEC_AB
+ ! isotropic kernels
+ ! note: takes displacement from backward/reconstructed (forward) field b_displ
+ ! and acceleration from adjoint field accel (containing adjoint sources)
+ !
+ ! note: : time integral summation uses deltat
+ !
+ ! compare with Tromp et al. (2005), eq. (14), which takes adjoint displacement
+ ! and forward acceleration, that is the symmetric form of what is calculated here
+ ! however, this kernel expression is symmetric with regards
+ ! to interchange adjoint - forward field
+ rho_kl(i,j,k,ispec) = rho_kl(i,j,k,ispec) &
+ + deltat * dot_product(accel(:,iglob), b_displ(:,iglob))
- ! elastic domains
- if( ispec_is_elastic(ispec) ) then
+ ! kernel for shear modulus, see e.g. Tromp et al. (2005), equation (17)
+ ! note: multiplication with 2*mu(x) will be done after the time loop
+ epsilondev_loc(1) = epsilondev_xx(i,j,k,ispec)
+ epsilondev_loc(2) = epsilondev_yy(i,j,k,ispec)
+ epsilondev_loc(3) = epsilondev_xy(i,j,k,ispec)
+ epsilondev_loc(4) = epsilondev_xz(i,j,k,ispec)
+ epsilondev_loc(5) = epsilondev_yz(i,j,k,ispec)
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool(i,j,k,ispec)
+ b_epsilondev_loc(1) = b_epsilondev_xx(i,j,k,ispec)
+ b_epsilondev_loc(2) = b_epsilondev_yy(i,j,k,ispec)
+ b_epsilondev_loc(3) = b_epsilondev_xy(i,j,k,ispec)
+ b_epsilondev_loc(4) = b_epsilondev_xz(i,j,k,ispec)
+ b_epsilondev_loc(5) = b_epsilondev_yz(i,j,k,ispec)
- ! isotropic kernels
- ! note: takes displacement from backward/reconstructed (forward) field b_displ
- ! and acceleration from adjoint field accel (containing adjoint sources)
- !
- ! note: : time integral summation uses deltat
- !
- ! compare with Tromp et al. (2005), eq. (14), which takes adjoint displacement
- ! and forward acceleration, that is the symmetric form of what is calculated here
- ! however, this kernel expression is symmetric with regards
- ! to interchange adjoint - forward field
- rho_kl(i,j,k,ispec) = rho_kl(i,j,k,ispec) &
- + deltat * dot_product(accel(:,iglob), b_displ(:,iglob))
+ mu_kl(i,j,k,ispec) = mu_kl(i,j,k,ispec) &
+ + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
+ + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
+ + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
+ epsilondev_loc(5)*b_epsilondev_loc(5)) )
- ! kernel for shear modulus, see e.g. Tromp et al. (2005), equation (17)
- ! note: multiplication with 2*mu(x) will be done after the time loop
- epsilondev_loc(1) = epsilondev_xx(i,j,k,ispec)
- epsilondev_loc(2) = epsilondev_yy(i,j,k,ispec)
- epsilondev_loc(3) = epsilondev_xy(i,j,k,ispec)
- epsilondev_loc(4) = epsilondev_xz(i,j,k,ispec)
- epsilondev_loc(5) = epsilondev_yz(i,j,k,ispec)
+ ! kernel for bulk modulus, see e.g. Tromp et al. (2005), equation (18)
+ ! note: multiplication with kappa(x) will be done after the time loop
+ kappa_kl(i,j,k,ispec) = kappa_kl(i,j,k,ispec) &
+ + deltat * (9 * epsilon_trace_over_3(i,j,k,ispec) &
+ * b_epsilon_trace_over_3(i,j,k,ispec))
- b_epsilondev_loc(1) = b_epsilondev_xx(i,j,k,ispec)
- b_epsilondev_loc(2) = b_epsilondev_yy(i,j,k,ispec)
- b_epsilondev_loc(3) = b_epsilondev_xy(i,j,k,ispec)
- b_epsilondev_loc(4) = b_epsilondev_xz(i,j,k,ispec)
- b_epsilondev_loc(5) = b_epsilondev_yz(i,j,k,ispec)
+ enddo
+ enddo
+ enddo
+ endif !ispec_is_elastic
- mu_kl(i,j,k,ispec) = mu_kl(i,j,k,ispec) &
- + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
- + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
- + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
- epsilondev_loc(5)*b_epsilondev_loc(5)) )
+ enddo
- ! kernel for bulk modulus, see e.g. Tromp et al. (2005), equation (18)
- ! note: multiplication with kappa(x) will be done after the time loop
- kappa_kl(i,j,k,ispec) = kappa_kl(i,j,k,ispec) &
- + deltat * (9 * epsilon_trace_over_3(i,j,k,ispec) &
- * b_epsilon_trace_over_3(i,j,k,ispec))
+ else
+ ! updates kernels on GPU
+ call compute_kernels_elastic_cuda(Mesh_pointer,deltat)
+ endif
- enddo
- enddo
- enddo
- endif !ispec_is_elastic
- enddo
-
! moho kernel
if( SAVE_MOHO_MESH ) then
- call compute_boundary_kernel()
+ if( GPU_MODE ) then
+ call transfer_accel_from_device(NDIM*NGLOB_AB,accel,Mesh_pointer)
+ call transfer_b_displ_from_device(NDIM*NGLOB_AB,b_displ,Mesh_pointer)
+ endif
+ ! updates on CPU
+ call compute_boundary_kernel()
endif
! for noise simulations --- source strength kernel
- if (NOISE_TOMOGRAPHY == 3) &
+ if (NOISE_TOMOGRAPHY == 3) then
call compute_kernels_strength_noise(NGLLSQUARE*num_free_surface_faces,ibool, &
sigma_kl,displ,deltat,it, &
normal_x_noise,normal_y_noise,normal_z_noise, &
@@ -158,6 +152,7 @@
NSPEC_AB,NGLOB_AB, &
num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
GPU_MODE,Mesh_pointer)
+ endif
end subroutine compute_kernels_el
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -264,20 +264,20 @@
NZ_IMAGE_color = NZ_IMAGE_color * zoom_factor
zoom = .true.
endif
-
+
! create all the pixels
if( NX_IMAGE_color /= 0 ) then
size_pixel_horizontal = (xmax_color_image - xmin_color_image) / dble(NX_IMAGE_color)
else
size_pixel_horizontal = 0.0
endif
-
+
if( NZ_IMAGE_color /= 0 ) then
size_pixel_vertical = (zmax_color_image - zmin_color_image) / dble(NZ_IMAGE_color)
else
size_pixel_vertical = 0.0
endif
-
+
if (myrank == 0) then
write(IMAIN,*) ' image points: ',npgeo_glob
write(IMAIN,*) ' xmin/xmax: ',xmin_color_image,'/',xmax_color_image
@@ -309,7 +309,7 @@
distance_z1 = 0.0
distance_z2 = 2.0*size_pixel_vertical
endif
-
+
do j=1,NZ_IMAGE_color
do i=1,NX_IMAGE_color
! calculates midpoint of pixel
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -149,13 +149,13 @@
gammaz(NGLLX,NGLLY,NGLLZ,NSPEC_AB), &
jacobian(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating arrays for databases'
-
+
! mesh node locations
allocate(xstore(NGLOB_AB), &
ystore(NGLOB_AB), &
zstore(NGLOB_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating arrays for mesh nodes'
-
+ if( ier /= 0 ) stop 'error allocating arrays for mesh nodes'
+
! material properties
allocate(kappastore(NGLLX,NGLLY,NGLLZ,NSPEC_AB), &
mustore(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -91,7 +91,7 @@
endif
! write the seismograms with time shift (GPU_MODE transfer included)
- if (nrec_local > 0) then
+ if (nrec_local > 0 .or. ( WRITE_SEISMOGRAMS_BY_MASTER .and. myrank == 0 ) ) then
call write_seismograms()
endif
@@ -109,18 +109,16 @@
if( MOVIE_SIMULATION ) then
call write_movie_output()
endif
-
+
! first step of noise tomography, i.e., save a surface movie at every time step
- if ( NOISE_TOMOGRAPHY == 1) then
- if( num_free_surface_faces == 0) then
- else
- call noise_save_surface_movie(displ, &
- ibool, &
- noise_surface_movie,it, &
- NSPEC_AB,NGLOB_AB, &
- num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
- Mesh_pointer,GPU_MODE)
- endif
+ if ( NOISE_TOMOGRAPHY == 1) then
+ if( num_free_surface_faces > 0) then
+ call noise_save_surface_movie(displ,ibool, &
+ noise_surface_movie,it, &
+ NSPEC_AB,NGLOB_AB, &
+ num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
+ Mesh_pointer,GPU_MODE)
+ endif
endif
!
@@ -129,7 +127,7 @@
enddo ! end of main time loop
call it_print_elapsed_time()
-
+
! Transfer fields from GPU card to host for further analysis
if(GPU_MODE) call it_transfer_from_GPU()
@@ -137,18 +135,17 @@
!=====================================================================
-
+
subroutine it_print_elapsed_time()
use specfem_par
use specfem_par_elastic
use specfem_par_acoustic
implicit none
- double precision :: tCPU,t_remain,t_total
- integer :: ihours,iminutes,iseconds,int_tCPU, &
- ihours_remain,iminutes_remain,iseconds_remain,int_t_remain, &
- ihours_total,iminutes_total,iseconds_total,int_t_total
-
+ ! local parameters
+ double precision :: tCPU
+ integer :: ihours,iminutes,iseconds,int_tCPU
+
if(myrank == 0) then
! elapsed time since beginning of the simulation
tCPU = wtime() - time_start
@@ -385,6 +382,7 @@
if( ACOUSTIC_SIMULATION ) then
if(.NOT. GPU_MODE) then
+ ! on CPU
potential_acoustic(:) = potential_acoustic(:) &
+ deltat * potential_dot_acoustic(:) &
+ deltatsqover2 * potential_dot_dot_acoustic(:)
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -447,7 +447,7 @@
if (NOISE_TOMOGRAPHY/=0) then
! save/read the surface movie using the same c routine as we do for absorbing boundaries (file ID is 2)
-
+
! size of single record
reclen=CUSTOM_REAL*NDIM*NGLLSQUARE*NSPEC_TOP
@@ -459,7 +459,7 @@
print *,'reclen of noise surface_movie needed exceeds integer 4-byte limit: ',reclen
print *,' ',CUSTOM_REAL, NDIM, NGLLSQUARE, NSPEC_TOP
print*,'bit size fortran: ',bit_size(NSPEC_TOP)
- call exit_MPI(myrank,"error NSPEC_TOP integer limit")
+ call exit_MPI(myrank,"error NSPEC_TOP integer limit")
endif
! total file size
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.F90 (from rev 19670, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.F90 (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.F90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -0,0 +1,1106 @@
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+!
+! United States and French Government Sponsorship Acknowledged.
+
+ subroutine prepare_timerun()
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+ use specfem_par_movie
+
+ implicit none
+ character(len=256) :: plot_file
+ integer :: ier
+
+ ! flag for any movie simulation
+ if( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP .or. &
+ MOVIE_SURFACE .or. CREATE_SHAKEMAP .or. MOVIE_VOLUME .or. PNM_GIF_IMAGE ) then
+ MOVIE_SIMULATION = .true.
+ else
+ MOVIE_SIMULATION = .false.
+ endif
+
+ ! user info
+ if(myrank == 0) then
+
+ write(IMAIN,*)
+ if(ATTENUATION) then
+ write(IMAIN,*) 'incorporating attenuation using ',N_SLS,' standard linear solids'
+ if(USE_OLSEN_ATTENUATION) then
+ write(IMAIN,*) 'using Olsen''s attenuation'
+ else
+ write(IMAIN,*) 'not using Olsen''s attenuation'
+ endif
+ else
+ write(IMAIN,*) 'no attenuation'
+ endif
+
+ write(IMAIN,*)
+ if(ANISOTROPY) then
+ write(IMAIN,*) 'incorporating anisotropy'
+ else
+ write(IMAIN,*) 'no anisotropy'
+ endif
+
+ write(IMAIN,*)
+ if(OCEANS) then
+ write(IMAIN,*) 'incorporating the oceans using equivalent load'
+ else
+ write(IMAIN,*) 'no oceans'
+ endif
+
+ write(IMAIN,*)
+ if(GRAVITY) then
+ write(IMAIN,*) 'incorporating gravity'
+ else
+ write(IMAIN,*) 'no gravity'
+ endif
+
+ write(IMAIN,*)
+ if(ACOUSTIC_SIMULATION) then
+ write(IMAIN,*) 'incorporating acoustic simulation'
+ else
+ write(IMAIN,*) 'no acoustic simulation'
+ endif
+
+ write(IMAIN,*)
+ if(ELASTIC_SIMULATION) then
+ write(IMAIN,*) 'incorporating elastic simulation'
+ else
+ write(IMAIN,*) 'no elastic simulation'
+ endif
+
+ write(IMAIN,*)
+ if(POROELASTIC_SIMULATION) then
+ write(IMAIN,*) 'incorporating poroelastic simulation'
+ else
+ write(IMAIN,*) 'no poroelastic simulation'
+ endif
+ write(IMAIN,*)
+
+ write(IMAIN,*)
+ if(MOVIE_SIMULATION) then
+ write(IMAIN,*) 'incorporating movie simulation'
+ else
+ write(IMAIN,*) 'no movie simulation'
+ endif
+ write(IMAIN,*)
+
+ endif
+
+ ! synchronize all the processes before assembling the mass matrix
+ ! to make sure all the nodes have finished to read their databases
+ call sync_all()
+
+ ! sets up mass matrices
+ call prepare_timerun_mass_matrices()
+
+ ! initialize acoustic arrays to zero
+ if( ACOUSTIC_SIMULATION ) then
+ potential_acoustic(:) = 0._CUSTOM_REAL
+ potential_dot_acoustic(:) = 0._CUSTOM_REAL
+ potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
+ ! put negligible initial value to avoid very slow underflow trapping
+ if(FIX_UNDERFLOW_PROBLEM) potential_acoustic(:) = VERYSMALLVAL
+ endif
+
+ ! initialize elastic arrays to zero/verysmallvall
+ if( ELASTIC_SIMULATION ) then
+ displ(:,:) = 0._CUSTOM_REAL
+ veloc(:,:) = 0._CUSTOM_REAL
+ accel(:,:) = 0._CUSTOM_REAL
+ ! put negligible initial value to avoid very slow underflow trapping
+ if(FIX_UNDERFLOW_PROBLEM) displ(:,:) = VERYSMALLVAL
+ endif
+
+
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ deltat = sngl(DT)
+ else
+ deltat = DT
+ endif
+ deltatover2 = deltat/2._CUSTOM_REAL
+ deltatsqover2 = deltat*deltat/2._CUSTOM_REAL
+
+ ! seismograms
+ if (nrec_local > 0) then
+ ! allocate seismogram array
+ allocate(seismograms_d(NDIM,nrec_local,NSTEP),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array seismograms_d'
+ allocate(seismograms_v(NDIM,nrec_local,NSTEP),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array seismograms_v'
+ allocate(seismograms_a(NDIM,nrec_local,NSTEP),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array seismograms_a'
+
+ ! initialize seismograms
+ seismograms_d(:,:,:) = 0._CUSTOM_REAL
+ seismograms_v(:,:,:) = 0._CUSTOM_REAL
+ seismograms_a(:,:,:) = 0._CUSTOM_REAL
+ endif
+
+ ! synchronize all the processes
+ call sync_all()
+
+ ! prepares attenuation arrays
+ call prepare_timerun_attenuation()
+
+ ! prepares gravity arrays
+ call prepare_timerun_gravity()
+
+ ! initializes PML arrays
+ if( ABSORBING_CONDITIONS ) then
+ if (SIMULATION_TYPE /= 1 .and. ABSORB_USE_PML ) then
+ write(IMAIN,*) 'NOTE: adjoint simulations and PML not supported yet...'
+ else
+ if( ABSORB_USE_PML ) then
+ call PML_initialize()
+ endif
+ endif
+ endif
+
+ ! opens source time function file
+ if(PRINT_SOURCE_TIME_FUNCTION .and. myrank == 0) then
+ ! print the source-time function
+ if(NSOURCES == 1) then
+ plot_file = '/plot_source_time_function.txt'
+ else
+ if(NSOURCES < 10) then
+ write(plot_file,"('/plot_source_time_function',i1,'.txt')") NSOURCES
+ else
+ write(plot_file,"('/plot_source_time_function',i2,'.txt')") NSOURCES
+ endif
+ endif
+ open(unit=IOSTF,file=trim(OUTPUT_FILES)//plot_file,status='unknown')
+ endif
+
+ ! user output
+ if(myrank == 0) then
+ write(IMAIN,*)
+ write(IMAIN,*) ' time step: ',sngl(DT),' s'
+ write(IMAIN,*) 'number of time steps: ',NSTEP
+ write(IMAIN,*) 'total simulated time: ',sngl(NSTEP*DT),' seconds'
+ write(IMAIN,*)
+ endif
+
+ ! prepares ADJOINT simulations
+ call prepare_timerun_adjoint()
+
+ ! prepares noise simulations
+ call prepare_timerun_noise()
+
+ ! prepares GPU arrays
+ if(GPU_MODE) call prepare_timerun_GPU()
+
+#ifdef OPENMP_MODE
+ ! prepares arrays for OpenMP
+ call prepare_timerun_OpenMP()
+#endif
+
+ end subroutine prepare_timerun
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine prepare_timerun_mass_matrices()
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+ implicit none
+
+! the mass matrix needs to be assembled with MPI here once and for all
+ if(ACOUSTIC_SIMULATION) then
+ call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_acoustic, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+ my_neighbours_ext_mesh)
+
+ ! fill mass matrix with fictitious non-zero values to make sure it can be inverted globally
+ where(rmass_acoustic <= 0._CUSTOM_REAL) rmass_acoustic = 1._CUSTOM_REAL
+ rmass_acoustic(:) = 1._CUSTOM_REAL / rmass_acoustic(:)
+
+ endif ! ACOUSTIC_SIMULATION
+
+ if(ELASTIC_SIMULATION) then
+ call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh)
+
+ ! fill mass matrix with fictitious non-zero values to make sure it can be inverted globally
+ where(rmass <= 0._CUSTOM_REAL) rmass = 1._CUSTOM_REAL
+ rmass(:) = 1._CUSTOM_REAL / rmass(:)
+
+ if(OCEANS ) then
+ if( minval(rmass_ocean_load(:)) <= 0._CUSTOM_REAL) &
+ call exit_MPI(myrank,'negative ocean load mass matrix term')
+ rmass_ocean_load(:) = 1. / rmass_ocean_load(:)
+ endif
+
+ endif ! ELASTIC_SIMULATION
+
+ if(POROELASTIC_SIMULATION) then
+
+ stop 'poroelastic simulation not implemented yet'
+ ! but would be something like this...
+ call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_solid_poroelastic, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh)
+
+ call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_fluid_poroelastic, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh)
+
+ ! fills mass matrix with fictitious non-zero values to make sure it can be inverted globally
+ where(rmass_solid_poroelastic <= 0._CUSTOM_REAL) rmass_solid_poroelastic = 1._CUSTOM_REAL
+ where(rmass_fluid_poroelastic <= 0._CUSTOM_REAL) rmass_fluid_poroelastic = 1._CUSTOM_REAL
+ rmass_solid_poroelastic(:) = 1._CUSTOM_REAL / rmass_solid_poroelastic(:)
+ rmass_fluid_poroelastic(:) = 1._CUSTOM_REAL / rmass_fluid_poroelastic(:)
+
+ endif ! POROELASTIC_SIMULATION
+
+ if(myrank == 0) write(IMAIN,*) 'end assembling MPI mass matrix'
+
+
+ end subroutine prepare_timerun_mass_matrices
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine prepare_timerun_attenuation()
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+ implicit none
+
+ ! local parameters
+ double precision, dimension(N_SLS) :: tau_sigma_dble
+ double precision :: f_c_source
+ double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
+ real(kind=CUSTOM_REAL):: scale_factorl
+ integer :: i,j,k,ispec,ier
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: scale_factor
+
+ ! if attenuation is on, shift shear moduli to center frequency of absorption period band, i.e.
+ ! rescale mu to average (central) frequency for attenuation
+ if(ATTENUATION) then
+
+ ! initializes arrays
+ one_minus_sum_beta(:,:,:,:) = 1._CUSTOM_REAL
+ factor_common(:,:,:,:,:) = 1._CUSTOM_REAL
+
+ allocate( scale_factor(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocation scale_factor')
+ scale_factor(:,:,:,:) = 1._CUSTOM_REAL
+
+ ! reads in attenuation arrays
+ open(unit=27, file=prname(1:len_trim(prname))//'attenuation.bin', &
+ status='old',action='read',form='unformatted',iostat=ier)
+ if( ier /= 0 ) then
+ print*,'error: could not open ',prname(1:len_trim(prname))//'attenuation.bin'
+ call exit_mpi(myrank,'error opening attenuation.bin file')
+ endif
+ read(27) ispec
+ if( ispec /= NSPEC_ATTENUATION_AB ) then
+ close(27)
+ print*,'error: attenuation file array ',ispec,'should be ',NSPEC_ATTENUATION_AB
+ call exit_mpi(myrank,'error attenuation array dimensions, please recompile and rerun generate_databases')
+ endif
+ read(27) one_minus_sum_beta
+ read(27) factor_common
+ read(27) scale_factor
+ close(27)
+
+
+ ! gets stress relaxation times tau_sigma, i.e.
+ ! precalculates tau_sigma depending on period band (constant for all Q_mu), and
+ ! determines central frequency f_c_source of attenuation period band
+ call get_attenuation_constants(min_resolved_period,tau_sigma_dble,&
+ f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+ ! determines alphaval,betaval,gammaval for runge-kutta scheme
+ if(CUSTOM_REAL == SIZE_REAL) then
+ tau_sigma(:) = sngl(tau_sigma_dble(:))
+ else
+ tau_sigma(:) = tau_sigma_dble(:)
+ endif
+ call get_attenuation_memory_values(tau_sigma,deltat,alphaval,betaval,gammaval)
+
+ ! shifts shear moduli
+ do ispec = 1,NSPEC_AB
+
+ ! skips non elastic elements
+ if( ispec_is_elastic(ispec) .eqv. .false. ) cycle
+
+ ! determines attenuation factors for each GLL point
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+
+ ! scales only mu moduli
+ scale_factorl = scale_factor(i,j,k,ispec)
+ mustore(i,j,k,ispec) = mustore(i,j,k,ispec) * scale_factorl
+
+ enddo
+ enddo
+ enddo
+ enddo
+
+ deallocate(scale_factor)
+
+ ! statistics
+ ! user output
+ if( myrank == 0 ) then
+ write(IMAIN,*)
+ write(IMAIN,*) "attenuation: "
+ write(IMAIN,*) " reference period (s) : ",sngl(1.0/ATTENUATION_f0_REFERENCE), &
+ " frequency: ",sngl(ATTENUATION_f0_REFERENCE)
+ write(IMAIN,*) " period band min/max (s): ",sngl(MIN_ATTENUATION_PERIOD),sngl(MAX_ATTENUATION_PERIOD)
+ write(IMAIN,*) " central period (s) : ",sngl(1.0/f_c_source), &
+ " frequency: ",sngl(f_c_source)
+ write(IMAIN,*)
+ endif
+
+ ! clear memory variables if attenuation
+ ! initialize memory variables for attenuation
+ epsilondev_xx(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_yy(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_xy(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_xz(:,:,:,:) = 0._CUSTOM_REAL
+ epsilondev_yz(:,:,:,:) = 0._CUSTOM_REAL
+
+ R_xx(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_yy(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_xy(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_xz(:,:,:,:,:) = 0._CUSTOM_REAL
+ R_yz(:,:,:,:,:) = 0._CUSTOM_REAL
+
+ if(FIX_UNDERFLOW_PROBLEM) then
+ R_xx(:,:,:,:,:) = VERYSMALLVAL
+ R_yy(:,:,:,:,:) = VERYSMALLVAL
+ R_xy(:,:,:,:,:) = VERYSMALLVAL
+ R_xz(:,:,:,:,:) = VERYSMALLVAL
+ R_yz(:,:,:,:,:) = VERYSMALLVAL
+ endif
+ endif
+
+ end subroutine prepare_timerun_attenuation
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine prepare_timerun_gravity()
+
+! precomputes gravity factors
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+ implicit none
+
+ ! local parameters
+ double precision RICB,RCMB,RTOPDDOUBLEPRIME, &
+ R80,R220,R400,R600,R670,R771,RMOHO,RMIDDLE_CRUST,ROCEAN
+ double precision :: rspl_gravity(NR),gspl(NR),gspl2(NR)
+ double precision :: radius,g,dg ! radius_km
+ !double precision :: g_cmb_dble,g_icb_dble
+ double precision :: rho,drhodr,vp,vs,Qkappa,Qmu
+ integer :: nspl_gravity !int_radius
+ integer :: i,j,k,iglob,ier
+
+ ! sets up weights needed for integration of gravity
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ wgll_cube(i,j,k) = sngl( wxgll(i)*wygll(j)*wzgll(k) )
+ enddo
+ enddo
+ enddo
+
+ ! store g, rho and dg/dr=dg using normalized radius in lookup table every 100 m
+ ! get density and velocity from PREM model using dummy doubling flag
+ ! this assumes that the gravity perturbations are small and smooth
+ ! and that we can neglect the 3D model and use PREM every 100 m in all cases
+ ! this is probably a rather reasonable assumption
+ if(GRAVITY) then
+
+ ! allocates gravity arrays
+ allocate( minus_deriv_gravity(NGLOB_AB), &
+ minus_g(NGLOB_AB), stat=ier)
+ if( ier /= 0 ) stop 'error allocating gravity arrays'
+
+ ! sets up spline table
+ call make_gravity(nspl_gravity,rspl_gravity,gspl,gspl2, &
+ ROCEAN,RMIDDLE_CRUST,RMOHO,R80,R220,R400,R600,R670, &
+ R771,RTOPDDOUBLEPRIME,RCMB,RICB)
+
+ ! pre-calculates gravity terms for all global points
+ do iglob = 1,NGLOB_AB
+
+ ! normalized radius ( zstore values given in m, negative values for depth)
+ radius = ( R_EARTH + zstore(iglob) ) / R_EARTH
+ call spline_evaluation(rspl_gravity,gspl,gspl2,nspl_gravity,radius,g)
+
+ ! use PREM density profile to calculate gravity (fine for other 1D models)
+ call model_prem_iso(radius,rho,drhodr,vp,vs,Qkappa,Qmu, &
+ RICB,RCMB,RTOPDDOUBLEPRIME, &
+ R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN)
+
+ dg = 4.0d0*rho - 2.0d0*g/radius
+
+ ! re-dimensionalize
+ g = g * R_EARTH*(PI*GRAV*RHOAV) ! in m / s^2 ( should be around 10 m/s^2 )
+ dg = dg * R_EARTH*(PI*GRAV*RHOAV) / R_EARTH ! gradient d/dz g , in 1/s^2
+
+ minus_deriv_gravity(iglob) = - dg
+ minus_g(iglob) = - g ! in negative z-direction
+
+ ! debug
+ !if( iglob == 1 .or. iglob == 1000 .or. iglob == 10000 ) then
+ ! ! re-dimensionalize
+ ! radius = radius * R_EARTH ! in m
+ ! vp = vp * R_EARTH*dsqrt(PI*GRAV*RHOAV) ! in m / s
+ ! rho = rho * RHOAV ! in kg / m^3
+ ! print*,'gravity: radius=',radius,'g=',g,'depth=',radius-R_EARTH
+ ! print*,'vp=',vp,'rho=',rho,'kappa=',(vp**2) * rho
+ ! print*,'minus_g..=',minus_g(iglob)
+ !endif
+ enddo
+
+ else
+
+ ! allocates dummy gravity arrays
+ allocate( minus_deriv_gravity(0), &
+ minus_g(0), stat=ier)
+ if( ier /= 0 ) stop 'error allocating gravity arrays'
+
+ endif
+
+ end subroutine prepare_timerun_gravity
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine prepare_timerun_adjoint()
+
+! prepares adjoint simulations
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+ implicit none
+ ! local parameters
+ integer :: ier
+ integer(kind=8) :: filesize
+
+! seismograms
+ if (nrec_local > 0 .and. SIMULATION_TYPE == 2 ) then
+ ! allocate Frechet derivatives array
+ allocate(Mxx_der(nrec_local),Myy_der(nrec_local), &
+ Mzz_der(nrec_local),Mxy_der(nrec_local), &
+ Mxz_der(nrec_local),Myz_der(nrec_local), &
+ sloc_der(NDIM,nrec_local),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array Mxx_der and following arrays'
+ Mxx_der = 0._CUSTOM_REAL
+ Myy_der = 0._CUSTOM_REAL
+ Mzz_der = 0._CUSTOM_REAL
+ Mxy_der = 0._CUSTOM_REAL
+ Mxz_der = 0._CUSTOM_REAL
+ Myz_der = 0._CUSTOM_REAL
+ sloc_der = 0._CUSTOM_REAL
+
+ allocate(seismograms_eps(NDIM,NDIM,nrec_local,NSTEP),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array seismograms_eps'
+ seismograms_eps(:,:,:,:) = 0._CUSTOM_REAL
+ endif
+
+! timing
+ if (SIMULATION_TYPE == 3) then
+
+ ! backward/reconstructed wavefields: time stepping is in time-reversed sense
+ ! (negative time increments)
+ if(CUSTOM_REAL == SIZE_REAL) then
+ b_deltat = - sngl(DT)
+ else
+ b_deltat = - DT
+ endif
+ b_deltatover2 = b_deltat/2._CUSTOM_REAL
+ b_deltatsqover2 = b_deltat*b_deltat/2._CUSTOM_REAL
+
+ endif
+
+! attenuation backward memories
+ if( ATTENUATION .and. SIMULATION_TYPE == 3 ) then
+
+ ! precompute Runge-Kutta coefficients if attenuation
+ call get_attenuation_memory_values(tau_sigma,b_deltat,b_alphaval,b_betaval,b_gammaval)
+
+ endif
+
+! initializes adjoint kernels and reconstructed/backward wavefields
+ if (SIMULATION_TYPE == 3) then
+ ! elastic domain
+ if( ELASTIC_SIMULATION ) then
+ rho_kl(:,:,:,:) = 0._CUSTOM_REAL
+ mu_kl(:,:,:,:) = 0._CUSTOM_REAL
+ kappa_kl(:,:,:,:) = 0._CUSTOM_REAL
+
+ if ( APPROXIMATE_HESS_KL ) &
+ hess_kl(:,:,:,:) = 0._CUSTOM_REAL
+
+ ! reconstructed/backward elastic wavefields
+ b_displ = 0._CUSTOM_REAL
+ b_veloc = 0._CUSTOM_REAL
+ b_accel = 0._CUSTOM_REAL
+ if(FIX_UNDERFLOW_PROBLEM) b_displ = VERYSMALLVAL
+
+ ! memory variables if attenuation
+ if( ATTENUATION ) then
+ b_R_xx = 0._CUSTOM_REAL
+ b_R_yy = 0._CUSTOM_REAL
+ b_R_xy = 0._CUSTOM_REAL
+ b_R_xz = 0._CUSTOM_REAL
+ b_R_yz = 0._CUSTOM_REAL
+ b_epsilondev_xx = 0._CUSTOM_REAL
+ b_epsilondev_yy = 0._CUSTOM_REAL
+ b_epsilondev_xy = 0._CUSTOM_REAL
+ b_epsilondev_xz = 0._CUSTOM_REAL
+ b_epsilondev_yz = 0._CUSTOM_REAL
+ endif
+
+ endif
+
+ ! acoustic domain
+ if( ACOUSTIC_SIMULATION ) then
+ rho_ac_kl(:,:,:,:) = 0._CUSTOM_REAL
+ kappa_ac_kl(:,:,:,:) = 0._CUSTOM_REAL
+
+ if ( APPROXIMATE_HESS_KL ) &
+ hess_ac_kl(:,:,:,:) = 0._CUSTOM_REAL
+
+ ! reconstructed/backward acoustic potentials
+ b_potential_acoustic = 0._CUSTOM_REAL
+ b_potential_dot_acoustic = 0._CUSTOM_REAL
+ b_potential_dot_dot_acoustic = 0._CUSTOM_REAL
+ if(FIX_UNDERFLOW_PROBLEM) b_potential_acoustic = VERYSMALLVAL
+
+ endif
+ endif
+
+! initialize Moho boundary index
+ if (SAVE_MOHO_MESH .and. SIMULATION_TYPE == 3) then
+ ispec2D_moho_top = 0
+ ispec2D_moho_bot = 0
+ endif
+
+! stacey absorbing fields will be reconstructed for adjoint simulations
+! using snapshot files of wavefields
+ if( ABSORBING_CONDITIONS ) then
+
+ ! opens absorbing wavefield saved/to-be-saved by forward simulations
+ if( num_abs_boundary_faces > 0 .and. (SIMULATION_TYPE == 3 .or. &
+ (SIMULATION_TYPE == 1 .and. SAVE_FORWARD)) ) then
+
+ b_num_abs_boundary_faces = num_abs_boundary_faces
+
+ ! elastic domains
+ if( ELASTIC_SIMULATION) then
+ ! allocates wavefield
+ allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array b_absorb_field'
+
+ ! size of single record
+ b_reclen_field = CUSTOM_REAL * NDIM * NGLLSQUARE * num_abs_boundary_faces
+
+ ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer
+ if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NDIM * NGLLSQUARE) ) then
+ print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_field
+ print *,' ',CUSTOM_REAL, NDIM, NGLLSQUARE, num_abs_boundary_faces
+ print*,'bit size fortran: ',bit_size(b_reclen_field)
+ call exit_MPI(myrank,"error b_reclen_field integer limit")
+ endif
+
+ ! total file size
+ filesize = b_reclen_field
+ filesize = filesize*NSTEP
+
+ if (SIMULATION_TYPE == 3) then
+ ! opens existing files
+
+ ! uses fortran routines for reading
+ !open(unit=IOABS,file=trim(prname)//'absorb_field.bin',status='old',&
+ ! action='read',form='unformatted',access='direct', &
+ ! recl=b_reclen_field+2*4,iostat=ier )
+ !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_field.bin file')
+ ! uses c routines for faster reading
+ call open_file_abs_r(0,trim(prname)//'absorb_field.bin', &
+ len_trim(trim(prname)//'absorb_field.bin'), &
+ filesize)
+
+ else
+ ! opens new file
+ ! uses fortran routines for writing
+ !open(unit=IOABS,file=trim(prname)//'absorb_field.bin',status='unknown',&
+ ! form='unformatted',access='direct',&
+ ! recl=b_reclen_field+2*4,iostat=ier )
+ !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_field.bin file')
+ ! uses c routines for faster writing (file index 0 for acoutic domain file)
+ call open_file_abs_w(0,trim(prname)//'absorb_field.bin', &
+ len_trim(trim(prname)//'absorb_field.bin'), &
+ filesize)
+
+ endif
+ endif
+
+ ! acoustic domains
+ if( ACOUSTIC_SIMULATION) then
+ ! allocates wavefield
+ allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
+
+ ! size of single record
+ b_reclen_potential = CUSTOM_REAL * NGLLSQUARE * num_abs_boundary_faces
+
+ ! check integer size limit: size of b_reclen_potential must fit onto an 4-byte integer
+ if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NGLLSQUARE) ) then
+ print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_potential
+ print *,' ',CUSTOM_REAL, NGLLSQUARE, num_abs_boundary_faces
+ print*,'bit size fortran: ',bit_size(b_reclen_potential)
+ call exit_MPI(myrank,"error b_reclen_potential integer limit")
+ endif
+
+ ! total file size (two lines to implicitly convert to 8-byte integers)
+ filesize = b_reclen_potential
+ filesize = filesize*NSTEP
+
+ ! debug check size limit
+ !if( NSTEP > 2147483647 / b_reclen_potential ) then
+ ! print *,'file size needed exceeds integer 4-byte limit: ',b_reclen_potential,NSTEP
+ ! print *,' ',CUSTOM_REAL, NGLLSQUARE, num_abs_boundary_faces,NSTEP
+ ! print*,'file size fortran: ',filesize
+ ! print*,'file bit size fortran: ',bit_size(filesize)
+ !endif
+
+ if (SIMULATION_TYPE == 3) then
+ ! opens existing files
+ ! uses fortran routines for reading
+ !open(unit=IOABS_AC,file=trim(prname)//'absorb_potential.bin',status='old',&
+ ! action='read',form='unformatted',access='direct', &
+ ! recl=b_reclen_potential+2*4,iostat=ier )
+ !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_potential.bin file')
+
+ ! uses c routines for faster reading
+ call open_file_abs_r(1,trim(prname)//'absorb_potential.bin', &
+ len_trim(trim(prname)//'absorb_potential.bin'), &
+ filesize)
+
+ else
+ ! opens new file
+ ! uses fortran routines for writing
+ !open(unit=IOABS_AC,file=trim(prname)//'absorb_potential.bin',status='unknown',&
+ ! form='unformatted',access='direct',&
+ ! recl=b_reclen_potential+2*4,iostat=ier )
+ !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_potential.bin file')
+ ! uses c routines for faster writing (file index 1 for acoutic domain file)
+ call open_file_abs_w(1,trim(prname)//'absorb_potential.bin', &
+ len_trim(trim(prname)//'absorb_potential.bin'), &
+ filesize)
+
+ endif
+ endif
+ else
+ ! needs dummy array
+ b_num_abs_boundary_faces = 1
+ if( ELASTIC_SIMULATION ) then
+ allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array b_absorb_field'
+ endif
+
+ if( ACOUSTIC_SIMULATION ) then
+ allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
+ endif
+ endif
+ else ! ABSORBING_CONDITIONS
+ ! needs dummy array
+ b_num_abs_boundary_faces = 1
+ if( ELASTIC_SIMULATION ) then
+ allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array b_absorb_field'
+ endif
+
+ if( ACOUSTIC_SIMULATION ) then
+ allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
+ endif
+ endif
+
+
+ end subroutine prepare_timerun_adjoint
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine prepare_timerun_noise()
+
+! prepares noise simulations
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+ use specfem_par_movie
+ implicit none
+ ! local parameters
+ integer :: ier
+
+ ! for noise simulations
+ if ( NOISE_TOMOGRAPHY /= 0 ) then
+
+ ! checks if free surface is defined
+ if( num_free_surface_faces == 0 ) then
+ write(*,*) myrank, " doesn't have a free_surface_face"
+ ! stop 'error: noise simulations need a free surface'
+ endif
+
+ ! allocates arrays
+ allocate(noise_sourcearray(NDIM,NGLLX,NGLLY,NGLLZ,NSTEP),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating noise source array')
+
+ allocate(normal_x_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array normal_x_noise'
+ allocate(normal_y_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array normal_y_noise'
+ allocate(normal_z_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array normal_z_noise'
+ allocate(mask_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array mask_noise'
+ allocate(noise_surface_movie(NDIM,NGLLSQUARE,num_free_surface_faces),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array noise_surface_movie'
+
+ ! initializes
+ noise_sourcearray(:,:,:,:,:) = 0._CUSTOM_REAL
+ normal_x_noise(:) = 0._CUSTOM_REAL
+ normal_y_noise(:) = 0._CUSTOM_REAL
+ normal_z_noise(:) = 0._CUSTOM_REAL
+ mask_noise(:) = 0._CUSTOM_REAL
+ noise_surface_movie(:,:,:) = 0._CUSTOM_REAL
+
+ ! sets up noise source for master receiver station
+ call read_parameters_noise(myrank,nrec,NSTEP,NGLLSQUARE*num_free_surface_faces, &
+ islice_selected_rec,xi_receiver,eta_receiver,gamma_receiver,nu, &
+ noise_sourcearray,xigll,yigll,zigll, &
+ ibool, &
+ xstore,ystore,zstore, &
+ irec_master_noise,normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
+ NSPEC_AB,NGLOB_AB, &
+ num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
+ ispec_is_acoustic)
+
+ ! checks flags for noise simulation
+ call check_parameters_noise(myrank,NOISE_TOMOGRAPHY,SIMULATION_TYPE,SAVE_FORWARD, &
+ LOCAL_PATH, &
+ num_free_surface_faces,NSTEP)
+ endif
+
+ end subroutine prepare_timerun_noise
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine prepare_timerun_GPU()
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+ use specfem_par_movie
+
+ implicit none
+ real :: free_mb,used_mb,total_mb
+ integer :: ncuda_devices,ncuda_devices_min,ncuda_devices_max
+
+ ! GPU_MODE now defined in Par_file
+ if(myrank == 0 ) then
+ write(IMAIN,*)
+ write(IMAIN,*) "GPU_MODE Active. Preparing Fields and Constants on Device."
+ write(IMAIN,*)
+ endif
+
+ ! initializes GPU and outputs info to files for all processes
+ call prepare_cuda_device(myrank,ncuda_devices)
+
+ ! collects min/max of local devices found for statistics
+ call sync_all()
+ call min_all_i(ncuda_devices,ncuda_devices_min)
+ call max_all_i(ncuda_devices,ncuda_devices_max)
+
+ ! prepares general fields on GPU
+ call prepare_constants_device(Mesh_pointer, &
+ NGLLX, NSPEC_AB, NGLOB_AB, &
+ xix, xiy, xiz, etax,etay,etaz, gammax, gammay, gammaz, &
+ kappastore, mustore,ibool, &
+ num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh, &
+ hprime_xx, hprime_yy, hprime_zz, &
+ hprimewgll_xx, hprimewgll_yy, hprimewgll_zz, &
+ wgllwgll_xy, wgllwgll_xz, wgllwgll_yz, &
+ ABSORBING_CONDITIONS, &
+ abs_boundary_ispec, abs_boundary_ijk, &
+ abs_boundary_normal, &
+ abs_boundary_jacobian2Dw, &
+ num_abs_boundary_faces, &
+ ispec_is_inner, &
+ NSOURCES, nsources_local, &
+ sourcearrays, islice_selected_source, ispec_selected_source, &
+ number_receiver_global, ispec_selected_rec, &
+ nrec, nrec_local, &
+ SIMULATION_TYPE, &
+ USE_MESH_COLORING_GPU, &
+ nspec_acoustic,nspec_elastic)
+
+
+ ! prepares fields on GPU for acoustic simulations
+ if( ACOUSTIC_SIMULATION ) then
+ call prepare_fields_acoustic_device(Mesh_pointer,rmass_acoustic,rhostore,kappastore, &
+ num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
+ ispec_is_acoustic, &
+ NOISE_TOMOGRAPHY,num_free_surface_faces, &
+ free_surface_ispec,free_surface_ijk, &
+ ABSORBING_CONDITIONS,b_reclen_potential,b_absorb_potential, &
+ ELASTIC_SIMULATION, num_coupling_ac_el_faces, &
+ coupling_ac_el_ispec,coupling_ac_el_ijk, &
+ coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
+ num_colors_outer_acoustic,num_colors_inner_acoustic, &
+ num_elem_colors_acoustic)
+
+ if( SIMULATION_TYPE == 3 ) &
+ call prepare_fields_acoustic_adj_dev(Mesh_pointer, &
+ SIMULATION_TYPE, &
+ APPROXIMATE_HESS_KL)
+
+ endif
+
+ ! prepares fields on GPU for elastic simulations
+ if( ELASTIC_SIMULATION ) then
+ call prepare_fields_elastic_device(Mesh_pointer, NDIM*NGLOB_AB, &
+ rmass,rho_vp,rho_vs, &
+ num_phase_ispec_elastic,phase_ispec_inner_elastic, &
+ ispec_is_elastic, &
+ ABSORBING_CONDITIONS,b_absorb_field,b_reclen_field, &
+ SIMULATION_TYPE,SAVE_FORWARD, &
+ COMPUTE_AND_STORE_STRAIN, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz, &
+ ATTENUATION, &
+ size(R_xx), &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ one_minus_sum_beta,factor_common, &
+ alphaval,betaval,gammaval, &
+ OCEANS,rmass_ocean_load, &
+ NOISE_TOMOGRAPHY, &
+ free_surface_normal,free_surface_ispec,free_surface_ijk, &
+ num_free_surface_faces, &
+ ACOUSTIC_SIMULATION, &
+ num_colors_outer_elastic,num_colors_inner_elastic, &
+ num_elem_colors_elastic, &
+ ANISOTROPY, &
+ c11store,c12store,c13store,c14store,c15store,c16store, &
+ c22store,c23store,c24store,c25store,c26store, &
+ c33store,c34store,c35store,c36store, &
+ c44store,c45store,c46store,c55store,c56store,c66store)
+
+ if( SIMULATION_TYPE == 3 ) &
+ call prepare_fields_elastic_adj_dev(Mesh_pointer, NDIM*NGLOB_AB, &
+ SIMULATION_TYPE, &
+ COMPUTE_AND_STORE_STRAIN, &
+ epsilon_trace_over_3, &
+ b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+ b_epsilondev_xz,b_epsilondev_yz, &
+ b_epsilon_trace_over_3, &
+ ATTENUATION,size(R_xx), &
+ b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+ b_alphaval,b_betaval,b_gammaval, &
+ APPROXIMATE_HESS_KL)
+
+ endif
+
+ ! prepares needed receiver array for adjoint runs
+ if( SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3 ) &
+ call prepare_sim2_or_3_const_device(Mesh_pointer, &
+ islice_selected_rec,size(islice_selected_rec), &
+ nadj_rec_local,nrec,myrank)
+
+ ! prepares fields on GPU for noise simulations
+ if ( NOISE_TOMOGRAPHY > 0 ) then
+ ! note: noise tomography is only supported for elastic domains so far.
+
+ ! copies noise arrays to GPU
+ call prepare_fields_noise_device(Mesh_pointer, NSPEC_AB, NGLOB_AB, &
+ free_surface_ispec, &
+ free_surface_ijk, &
+ num_free_surface_faces, &
+ SIMULATION_TYPE,NOISE_TOMOGRAPHY, &
+ NSTEP,noise_sourcearray, &
+ normal_x_noise,normal_y_noise,normal_z_noise, &
+ mask_noise,free_surface_jacobian2Dw)
+
+ endif ! NOISE_TOMOGRAPHY
+
+ ! prepares gravity arrays
+ if( GRAVITY ) then
+ call prepare_fields_gravity_device(Mesh_pointer,GRAVITY, &
+ minus_deriv_gravity,minus_g,wgll_cube,&
+ ACOUSTIC_SIMULATION,rhostore)
+ endif
+
+ ! sends initial data to device
+
+ ! puts acoustic initial fields onto GPU
+ if( ACOUSTIC_SIMULATION ) then
+ call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
+ if( SIMULATION_TYPE == 3 ) &
+ call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
+ b_potential_dot_acoustic,b_potential_dot_dot_acoustic,Mesh_pointer)
+ endif
+
+ ! puts elastic initial fields onto GPU
+ if( ELASTIC_SIMULATION ) then
+ ! transfer forward and backward fields to device with initial values
+ call transfer_fields_el_to_device(NDIM*NGLOB_AB,displ,veloc,accel,Mesh_pointer)
+ if(SIMULATION_TYPE == 3) &
+ call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
+ endif
+
+ ! outputs GPU usage to files for all processes
+ call output_free_device_memory(myrank)
+
+ ! outputs usage for main process
+ if( myrank == 0 ) then
+ write(IMAIN,*)" GPU number of devices per node: min =",ncuda_devices_min
+ write(IMAIN,*)" max =",ncuda_devices_max
+ write(IMAIN,*)
+
+ call get_free_device_memory(free_mb,used_mb,total_mb)
+ write(IMAIN,*)" GPU usage: free =",free_mb," MB",nint(free_mb/total_mb*100.0),"%"
+ write(IMAIN,*)" used =",used_mb," MB",nint(used_mb/total_mb*100.0),"%"
+ write(IMAIN,*)" total =",total_mb," MB",nint(total_mb/total_mb*100.0),"%"
+ write(IMAIN,*)
+ endif
+
+ end subroutine prepare_timerun_GPU
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! OpenMP version uses "special" compute_forces_elastic_Dev routine
+! we need to set num_elem_colors_elastic arrays
+
+#ifdef OPENMP_MODE
+ subroutine prepare_timerun_OpenMP()
+
+ use specfem_par
+ use specfem_par_elastic
+ implicit none
+
+ ! local parameters
+ integer :: ier
+ integer :: NUM_THREADS
+ integer :: OMP_GET_MAX_THREADS
+
+ ! OpenMP for elastic simulation only supported yet
+ if( ELASTIC_SIMULATION ) then
+
+ NUM_THREADS = OMP_GET_MAX_THREADS()
+ if( myrank == 0 ) then
+ write(IMAIN,*)
+ write(IMAIN,*) 'Using:',NUM_THREADS, ' OpenMP threads'
+ write(IMAIN,*)
+ endif
+
+ ! allocate cfe_Dev_openmp local arrays for OpenMP version
+ allocate(dummyx_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(dummyy_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(dummyz_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(newtempx1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(newtempx2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(newtempx3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(newtempy1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(newtempy2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(newtempy3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(newtempz1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(newtempz2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(newtempz3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(tempx1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(tempx2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(tempx3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(tempy1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(tempy2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(tempy3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(tempz1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(tempz2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+ allocate(tempz3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+
+ ! set num_elem_colors array in case no mesh coloring is used
+ if( .not. USE_MESH_COLORING_GPU ) then
+ ! deallocate dummy array
+ if( allocated(num_elem_colors_elastic) ) deallocate(num_elem_colors_elastic)
+
+ ! loads with corresonding values
+ num_colors_outer_elastic = 1
+ num_colors_inner_elastic = 1
+ allocate(num_elem_colors_elastic(num_colors_outer_elastic + num_colors_inner_elastic),stat=ier)
+ if( ier /= 0 ) stop 'error allocating num_elem_colors_elastic array'
+
+ ! sets to all elements in inner/outer phase
+ num_elem_colors_elastic(1) = nspec_outer_elastic
+ num_elem_colors_elastic(2) = nspec_inner_elastic
+ endif
+
+ endif
+
+ end subroutine prepare_timerun_OpenMP
+#endif
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -1,1028 +0,0 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-!
-! United States and French Government Sponsorship Acknowledged.
-
- subroutine prepare_timerun()
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
- use specfem_par_movie
-
- implicit none
- character(len=256) :: plot_file
- integer :: ier
-
- ! flag for any movie simulation
- if( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP .or. &
- MOVIE_SURFACE .or. CREATE_SHAKEMAP .or. MOVIE_VOLUME .or. PNM_GIF_IMAGE ) then
- MOVIE_SIMULATION = .true.
- else
- MOVIE_SIMULATION = .false.
- endif
-
- ! user info
- if(myrank == 0) then
-
- write(IMAIN,*)
- if(ATTENUATION) then
- write(IMAIN,*) 'incorporating attenuation using ',N_SLS,' standard linear solids'
- if(USE_OLSEN_ATTENUATION) then
- write(IMAIN,*) 'using Olsen''s attenuation'
- else
- write(IMAIN,*) 'not using Olsen''s attenuation'
- endif
- else
- write(IMAIN,*) 'no attenuation'
- endif
-
- write(IMAIN,*)
- if(ANISOTROPY) then
- write(IMAIN,*) 'incorporating anisotropy'
- else
- write(IMAIN,*) 'no anisotropy'
- endif
-
- write(IMAIN,*)
- if(OCEANS) then
- write(IMAIN,*) 'incorporating the oceans using equivalent load'
- else
- write(IMAIN,*) 'no oceans'
- endif
-
- write(IMAIN,*)
- if(GRAVITY) then
- write(IMAIN,*) 'incorporating gravity'
- else
- write(IMAIN,*) 'no gravity'
- endif
-
- write(IMAIN,*)
- if(ACOUSTIC_SIMULATION) then
- write(IMAIN,*) 'incorporating acoustic simulation'
- else
- write(IMAIN,*) 'no acoustic simulation'
- endif
-
- write(IMAIN,*)
- if(ELASTIC_SIMULATION) then
- write(IMAIN,*) 'incorporating elastic simulation'
- else
- write(IMAIN,*) 'no elastic simulation'
- endif
-
- write(IMAIN,*)
- if(POROELASTIC_SIMULATION) then
- write(IMAIN,*) 'incorporating poroelastic simulation'
- else
- write(IMAIN,*) 'no poroelastic simulation'
- endif
- write(IMAIN,*)
-
- write(IMAIN,*)
- if(MOVIE_SIMULATION) then
- write(IMAIN,*) 'incorporating movie simulation'
- else
- write(IMAIN,*) 'no movie simulation'
- endif
- write(IMAIN,*)
-
- endif
-
- ! synchronize all the processes before assembling the mass matrix
- ! to make sure all the nodes have finished to read their databases
- call sync_all()
-
- ! sets up mass matrices
- call prepare_timerun_mass_matrices()
-
- ! initialize acoustic arrays to zero
- if( ACOUSTIC_SIMULATION ) then
- potential_acoustic(:) = 0._CUSTOM_REAL
- potential_dot_acoustic(:) = 0._CUSTOM_REAL
- potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
- ! put negligible initial value to avoid very slow underflow trapping
- if(FIX_UNDERFLOW_PROBLEM) potential_acoustic(:) = VERYSMALLVAL
- endif
-
- ! initialize elastic arrays to zero/verysmallvall
- if( ELASTIC_SIMULATION ) then
- displ(:,:) = 0._CUSTOM_REAL
- veloc(:,:) = 0._CUSTOM_REAL
- accel(:,:) = 0._CUSTOM_REAL
- ! put negligible initial value to avoid very slow underflow trapping
- if(FIX_UNDERFLOW_PROBLEM) displ(:,:) = VERYSMALLVAL
- endif
-
-
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- deltat = sngl(DT)
- else
- deltat = DT
- endif
- deltatover2 = deltat/2._CUSTOM_REAL
- deltatsqover2 = deltat*deltat/2._CUSTOM_REAL
-
- ! seismograms
- if (nrec_local > 0) then
- ! allocate seismogram array
- allocate(seismograms_d(NDIM,nrec_local,NSTEP),stat=ier)
- if( ier /= 0 ) stop 'error allocating array seismograms_d'
- allocate(seismograms_v(NDIM,nrec_local,NSTEP),stat=ier)
- if( ier /= 0 ) stop 'error allocating array seismograms_v'
- allocate(seismograms_a(NDIM,nrec_local,NSTEP),stat=ier)
- if( ier /= 0 ) stop 'error allocating array seismograms_a'
-
- ! initialize seismograms
- seismograms_d(:,:,:) = 0._CUSTOM_REAL
- seismograms_v(:,:,:) = 0._CUSTOM_REAL
- seismograms_a(:,:,:) = 0._CUSTOM_REAL
- endif
-
- ! synchronize all the processes
- call sync_all()
-
- ! prepares attenuation arrays
- call prepare_timerun_attenuation()
-
- ! prepares gravity arrays
- call prepare_timerun_gravity()
-
- ! initializes PML arrays
- if( ABSORBING_CONDITIONS ) then
- if (SIMULATION_TYPE /= 1 .and. ABSORB_USE_PML ) then
- write(IMAIN,*) 'NOTE: adjoint simulations and PML not supported yet...'
- else
- if( ABSORB_USE_PML ) then
- call PML_initialize()
- endif
- endif
- endif
-
- ! opens source time function file
- if(PRINT_SOURCE_TIME_FUNCTION .and. myrank == 0) then
- ! print the source-time function
- if(NSOURCES == 1) then
- plot_file = '/plot_source_time_function.txt'
- else
- if(NSOURCES < 10) then
- write(plot_file,"('/plot_source_time_function',i1,'.txt')") NSOURCES
- else
- write(plot_file,"('/plot_source_time_function',i2,'.txt')") NSOURCES
- endif
- endif
- open(unit=IOSTF,file=trim(OUTPUT_FILES)//plot_file,status='unknown')
- endif
-
- ! user output
- if(myrank == 0) then
- write(IMAIN,*)
- write(IMAIN,*) ' time step: ',sngl(DT),' s'
- write(IMAIN,*) 'number of time steps: ',NSTEP
- write(IMAIN,*) 'total simulated time: ',sngl(NSTEP*DT),' seconds'
- write(IMAIN,*)
- endif
-
- ! prepares ADJOINT simulations
- call prepare_timerun_adjoint()
-
- ! prepares noise simulations
- call prepare_timerun_noise()
-
- ! prepares GPU arrays
- if(GPU_MODE) call prepare_timerun_GPU()
-
- end subroutine prepare_timerun
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine prepare_timerun_mass_matrices()
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
- implicit none
-
-! the mass matrix needs to be assembled with MPI here once and for all
- if(ACOUSTIC_SIMULATION) then
- call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_acoustic, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
- my_neighbours_ext_mesh)
-
- ! fill mass matrix with fictitious non-zero values to make sure it can be inverted globally
- where(rmass_acoustic <= 0._CUSTOM_REAL) rmass_acoustic = 1._CUSTOM_REAL
- rmass_acoustic(:) = 1._CUSTOM_REAL / rmass_acoustic(:)
-
- endif ! ACOUSTIC_SIMULATION
-
- if(ELASTIC_SIMULATION) then
- call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- my_neighbours_ext_mesh)
-
- ! fill mass matrix with fictitious non-zero values to make sure it can be inverted globally
- where(rmass <= 0._CUSTOM_REAL) rmass = 1._CUSTOM_REAL
- rmass(:) = 1._CUSTOM_REAL / rmass(:)
-
- if(OCEANS ) then
- if( minval(rmass_ocean_load(:)) <= 0._CUSTOM_REAL) &
- call exit_MPI(myrank,'negative ocean load mass matrix term')
- rmass_ocean_load(:) = 1. / rmass_ocean_load(:)
- endif
-
- endif ! ELASTIC_SIMULATION
-
- if(POROELASTIC_SIMULATION) then
-
- stop 'poroelastic simulation not implemented yet'
- ! but would be something like this...
- call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_solid_poroelastic, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- my_neighbours_ext_mesh)
-
- call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_fluid_poroelastic, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- my_neighbours_ext_mesh)
-
- ! fills mass matrix with fictitious non-zero values to make sure it can be inverted globally
- where(rmass_solid_poroelastic <= 0._CUSTOM_REAL) rmass_solid_poroelastic = 1._CUSTOM_REAL
- where(rmass_fluid_poroelastic <= 0._CUSTOM_REAL) rmass_fluid_poroelastic = 1._CUSTOM_REAL
- rmass_solid_poroelastic(:) = 1._CUSTOM_REAL / rmass_solid_poroelastic(:)
- rmass_fluid_poroelastic(:) = 1._CUSTOM_REAL / rmass_fluid_poroelastic(:)
-
- endif ! POROELASTIC_SIMULATION
-
- if(myrank == 0) write(IMAIN,*) 'end assembling MPI mass matrix'
-
-
- end subroutine prepare_timerun_mass_matrices
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine prepare_timerun_attenuation()
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
- implicit none
-
- ! local parameters
- double precision, dimension(N_SLS) :: tau_sigma_dble
- double precision :: f_c_source
- double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
- real(kind=CUSTOM_REAL):: scale_factorl
- integer :: i,j,k,ispec,ier
- real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: scale_factor
-
- ! if attenuation is on, shift shear moduli to center frequency of absorption period band, i.e.
- ! rescale mu to average (central) frequency for attenuation
- if(ATTENUATION) then
-
- ! initializes arrays
- one_minus_sum_beta(:,:,:,:) = 1._CUSTOM_REAL
- factor_common(:,:,:,:,:) = 1._CUSTOM_REAL
-
- allocate( scale_factor(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB),stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocation scale_factor')
- scale_factor(:,:,:,:) = 1._CUSTOM_REAL
-
- ! reads in attenuation arrays
- open(unit=27, file=prname(1:len_trim(prname))//'attenuation.bin', &
- status='old',action='read',form='unformatted',iostat=ier)
- if( ier /= 0 ) then
- print*,'error: could not open ',prname(1:len_trim(prname))//'attenuation.bin'
- call exit_mpi(myrank,'error opening attenuation.bin file')
- endif
- read(27) ispec
- if( ispec /= NSPEC_ATTENUATION_AB ) then
- close(27)
- print*,'error: attenuation file array ',ispec,'should be ',NSPEC_ATTENUATION_AB
- call exit_mpi(myrank,'error attenuation array dimensions, please recompile and rerun generate_databases')
- endif
- read(27) one_minus_sum_beta
- read(27) factor_common
- read(27) scale_factor
- close(27)
-
-
- ! gets stress relaxation times tau_sigma, i.e.
- ! precalculates tau_sigma depending on period band (constant for all Q_mu), and
- ! determines central frequency f_c_source of attenuation period band
- call get_attenuation_constants(min_resolved_period,tau_sigma_dble,&
- f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
- ! determines alphaval,betaval,gammaval for runge-kutta scheme
- if(CUSTOM_REAL == SIZE_REAL) then
- tau_sigma(:) = sngl(tau_sigma_dble(:))
- else
- tau_sigma(:) = tau_sigma_dble(:)
- endif
- call get_attenuation_memory_values(tau_sigma,deltat,alphaval,betaval,gammaval)
-
- ! shifts shear moduli
- do ispec = 1,NSPEC_AB
-
- ! skips non elastic elements
- if( ispec_is_elastic(ispec) .eqv. .false. ) cycle
-
- ! determines attenuation factors for each GLL point
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
-
- ! scales only mu moduli
- scale_factorl = scale_factor(i,j,k,ispec)
- mustore(i,j,k,ispec) = mustore(i,j,k,ispec) * scale_factorl
-
- enddo
- enddo
- enddo
- enddo
-
- deallocate(scale_factor)
-
- ! statistics
- ! user output
- if( myrank == 0 ) then
- write(IMAIN,*)
- write(IMAIN,*) "attenuation: "
- write(IMAIN,*) " reference period (s) : ",sngl(1.0/ATTENUATION_f0_REFERENCE), &
- " frequency: ",sngl(ATTENUATION_f0_REFERENCE)
- write(IMAIN,*) " period band min/max (s): ",sngl(MIN_ATTENUATION_PERIOD),sngl(MAX_ATTENUATION_PERIOD)
- write(IMAIN,*) " central period (s) : ",sngl(1.0/f_c_source), &
- " frequency: ",sngl(f_c_source)
- write(IMAIN,*)
- endif
-
- ! clear memory variables if attenuation
- ! initialize memory variables for attenuation
- epsilondev_xx(:,:,:,:) = 0._CUSTOM_REAL
- epsilondev_yy(:,:,:,:) = 0._CUSTOM_REAL
- epsilondev_xy(:,:,:,:) = 0._CUSTOM_REAL
- epsilondev_xz(:,:,:,:) = 0._CUSTOM_REAL
- epsilondev_yz(:,:,:,:) = 0._CUSTOM_REAL
-
- R_xx(:,:,:,:,:) = 0._CUSTOM_REAL
- R_yy(:,:,:,:,:) = 0._CUSTOM_REAL
- R_xy(:,:,:,:,:) = 0._CUSTOM_REAL
- R_xz(:,:,:,:,:) = 0._CUSTOM_REAL
- R_yz(:,:,:,:,:) = 0._CUSTOM_REAL
-
- if(FIX_UNDERFLOW_PROBLEM) then
- R_xx(:,:,:,:,:) = VERYSMALLVAL
- R_yy(:,:,:,:,:) = VERYSMALLVAL
- R_xy(:,:,:,:,:) = VERYSMALLVAL
- R_xz(:,:,:,:,:) = VERYSMALLVAL
- R_yz(:,:,:,:,:) = VERYSMALLVAL
- endif
- endif
-
- end subroutine prepare_timerun_attenuation
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine prepare_timerun_gravity()
-
-! precomputes gravity factors
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
- implicit none
-
- ! local parameters
- double precision RICB,RCMB,RTOPDDOUBLEPRIME, &
- R80,R220,R400,R600,R670,R771,RMOHO,RMIDDLE_CRUST,ROCEAN
- double precision :: rspl_gravity(NR),gspl(NR),gspl2(NR)
- double precision :: radius,g,dg ! radius_km
- !double precision :: g_cmb_dble,g_icb_dble
- double precision :: rho,drhodr,vp,vs,Qkappa,Qmu
- integer :: nspl_gravity !int_radius
- integer :: i,j,k,iglob,ier
-
- ! sets up weights needed for integration of gravity
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- wgll_cube(i,j,k) = sngl( wxgll(i)*wygll(j)*wzgll(k) )
- enddo
- enddo
- enddo
-
- ! store g, rho and dg/dr=dg using normalized radius in lookup table every 100 m
- ! get density and velocity from PREM model using dummy doubling flag
- ! this assumes that the gravity perturbations are small and smooth
- ! and that we can neglect the 3D model and use PREM every 100 m in all cases
- ! this is probably a rather reasonable assumption
- if(GRAVITY) then
-
- ! allocates gravity arrays
- allocate( minus_deriv_gravity(NGLOB_AB), &
- minus_g(NGLOB_AB), stat=ier)
- if( ier /= 0 ) stop 'error allocating gravity arrays'
-
- ! sets up spline table
- call make_gravity(nspl_gravity,rspl_gravity,gspl,gspl2, &
- ROCEAN,RMIDDLE_CRUST,RMOHO,R80,R220,R400,R600,R670, &
- R771,RTOPDDOUBLEPRIME,RCMB,RICB)
-
- ! pre-calculates gravity terms for all global points
- do iglob = 1,NGLOB_AB
-
- ! normalized radius ( zstore values given in m, negative values for depth)
- radius = ( R_EARTH + zstore(iglob) ) / R_EARTH
- call spline_evaluation(rspl_gravity,gspl,gspl2,nspl_gravity,radius,g)
-
- ! use PREM density profile to calculate gravity (fine for other 1D models)
- call model_prem_iso(radius,rho,drhodr,vp,vs,Qkappa,Qmu, &
- RICB,RCMB,RTOPDDOUBLEPRIME, &
- R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN)
-
- dg = 4.0d0*rho - 2.0d0*g/radius
-
- ! re-dimensionalize
- g = g * R_EARTH*(PI*GRAV*RHOAV) ! in m / s^2 ( should be around 10 m/s^2 )
- dg = dg * R_EARTH*(PI*GRAV*RHOAV) / R_EARTH ! gradient d/dz g , in 1/s^2
-
- minus_deriv_gravity(iglob) = - dg
- minus_g(iglob) = - g ! in negative z-direction
-
- ! debug
- !if( iglob == 1 .or. iglob == 1000 .or. iglob == 10000 ) then
- ! ! re-dimensionalize
- ! radius = radius * R_EARTH ! in m
- ! vp = vp * R_EARTH*dsqrt(PI*GRAV*RHOAV) ! in m / s
- ! rho = rho * RHOAV ! in kg / m^3
- ! print*,'gravity: radius=',radius,'g=',g,'depth=',radius-R_EARTH
- ! print*,'vp=',vp,'rho=',rho,'kappa=',(vp**2) * rho
- ! print*,'minus_g..=',minus_g(iglob)
- !endif
- enddo
-
- else
-
- ! allocates dummy gravity arrays
- allocate( minus_deriv_gravity(0), &
- minus_g(0), stat=ier)
- if( ier /= 0 ) stop 'error allocating gravity arrays'
-
- endif
-
- end subroutine prepare_timerun_gravity
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine prepare_timerun_adjoint()
-
-! prepares adjoint simulations
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
- implicit none
- ! local parameters
- integer :: ier
- integer(kind=8) :: filesize
-
-! seismograms
- if (nrec_local > 0 .and. SIMULATION_TYPE == 2 ) then
- ! allocate Frechet derivatives array
- allocate(Mxx_der(nrec_local),Myy_der(nrec_local), &
- Mzz_der(nrec_local),Mxy_der(nrec_local), &
- Mxz_der(nrec_local),Myz_der(nrec_local), &
- sloc_der(NDIM,nrec_local),stat=ier)
- if( ier /= 0 ) stop 'error allocating array Mxx_der and following arrays'
- Mxx_der = 0._CUSTOM_REAL
- Myy_der = 0._CUSTOM_REAL
- Mzz_der = 0._CUSTOM_REAL
- Mxy_der = 0._CUSTOM_REAL
- Mxz_der = 0._CUSTOM_REAL
- Myz_der = 0._CUSTOM_REAL
- sloc_der = 0._CUSTOM_REAL
-
- allocate(seismograms_eps(NDIM,NDIM,nrec_local,NSTEP),stat=ier)
- if( ier /= 0 ) stop 'error allocating array seismograms_eps'
- seismograms_eps(:,:,:,:) = 0._CUSTOM_REAL
- endif
-
-! timing
- if (SIMULATION_TYPE == 3) then
-
- ! backward/reconstructed wavefields: time stepping is in time-reversed sense
- ! (negative time increments)
- if(CUSTOM_REAL == SIZE_REAL) then
- b_deltat = - sngl(DT)
- else
- b_deltat = - DT
- endif
- b_deltatover2 = b_deltat/2._CUSTOM_REAL
- b_deltatsqover2 = b_deltat*b_deltat/2._CUSTOM_REAL
-
- endif
-
-! attenuation backward memories
- if( ATTENUATION .and. SIMULATION_TYPE == 3 ) then
-
- ! precompute Runge-Kutta coefficients if attenuation
- call get_attenuation_memory_values(tau_sigma,b_deltat,b_alphaval,b_betaval,b_gammaval)
-
- endif
-
-! initializes adjoint kernels and reconstructed/backward wavefields
- if (SIMULATION_TYPE == 3) then
- ! elastic domain
- if( ELASTIC_SIMULATION ) then
- rho_kl(:,:,:,:) = 0._CUSTOM_REAL
- mu_kl(:,:,:,:) = 0._CUSTOM_REAL
- kappa_kl(:,:,:,:) = 0._CUSTOM_REAL
-
- if ( APPROXIMATE_HESS_KL ) &
- hess_kl(:,:,:,:) = 0._CUSTOM_REAL
-
- ! reconstructed/backward elastic wavefields
- b_displ = 0._CUSTOM_REAL
- b_veloc = 0._CUSTOM_REAL
- b_accel = 0._CUSTOM_REAL
- if(FIX_UNDERFLOW_PROBLEM) b_displ = VERYSMALLVAL
-
- ! memory variables if attenuation
- if( ATTENUATION ) then
- b_R_xx = 0._CUSTOM_REAL
- b_R_yy = 0._CUSTOM_REAL
- b_R_xy = 0._CUSTOM_REAL
- b_R_xz = 0._CUSTOM_REAL
- b_R_yz = 0._CUSTOM_REAL
- b_epsilondev_xx = 0._CUSTOM_REAL
- b_epsilondev_yy = 0._CUSTOM_REAL
- b_epsilondev_xy = 0._CUSTOM_REAL
- b_epsilondev_xz = 0._CUSTOM_REAL
- b_epsilondev_yz = 0._CUSTOM_REAL
- endif
-
- endif
-
- ! acoustic domain
- if( ACOUSTIC_SIMULATION ) then
- rho_ac_kl(:,:,:,:) = 0._CUSTOM_REAL
- kappa_ac_kl(:,:,:,:) = 0._CUSTOM_REAL
-
- if ( APPROXIMATE_HESS_KL ) &
- hess_ac_kl(:,:,:,:) = 0._CUSTOM_REAL
-
- ! reconstructed/backward acoustic potentials
- b_potential_acoustic = 0._CUSTOM_REAL
- b_potential_dot_acoustic = 0._CUSTOM_REAL
- b_potential_dot_dot_acoustic = 0._CUSTOM_REAL
- if(FIX_UNDERFLOW_PROBLEM) b_potential_acoustic = VERYSMALLVAL
-
- endif
- endif
-
-! initialize Moho boundary index
- if (SAVE_MOHO_MESH .and. SIMULATION_TYPE == 3) then
- ispec2D_moho_top = 0
- ispec2D_moho_bot = 0
- endif
-
-! stacey absorbing fields will be reconstructed for adjoint simulations
-! using snapshot files of wavefields
- if( ABSORBING_CONDITIONS ) then
-
- ! opens absorbing wavefield saved/to-be-saved by forward simulations
- if( num_abs_boundary_faces > 0 .and. (SIMULATION_TYPE == 3 .or. &
- (SIMULATION_TYPE == 1 .and. SAVE_FORWARD)) ) then
-
- b_num_abs_boundary_faces = num_abs_boundary_faces
-
- ! elastic domains
- if( ELASTIC_SIMULATION) then
- ! allocates wavefield
- allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array b_absorb_field'
-
- ! size of single record
- b_reclen_field = CUSTOM_REAL * NDIM * NGLLSQUARE * num_abs_boundary_faces
-
- ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer
- if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NDIM * NGLLSQUARE) ) then
- print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_field
- print *,' ',CUSTOM_REAL, NDIM, NGLLSQUARE, num_abs_boundary_faces
- print*,'bit size fortran: ',bit_size(b_reclen_field)
- call exit_MPI(myrank,"error b_reclen_field integer limit")
- endif
-
- ! total file size
- filesize = b_reclen_field
- filesize = filesize*NSTEP
-
- if (SIMULATION_TYPE == 3) then
- ! opens existing files
-
- ! uses fortran routines for reading
- !open(unit=IOABS,file=trim(prname)//'absorb_field.bin',status='old',&
- ! action='read',form='unformatted',access='direct', &
- ! recl=b_reclen_field+2*4,iostat=ier )
- !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_field.bin file')
- ! uses c routines for faster reading
- call open_file_abs_r(0,trim(prname)//'absorb_field.bin', &
- len_trim(trim(prname)//'absorb_field.bin'), &
- filesize)
-
- else
- ! opens new file
- ! uses fortran routines for writing
- !open(unit=IOABS,file=trim(prname)//'absorb_field.bin',status='unknown',&
- ! form='unformatted',access='direct',&
- ! recl=b_reclen_field+2*4,iostat=ier )
- !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_field.bin file')
- ! uses c routines for faster writing (file index 0 for acoutic domain file)
- call open_file_abs_w(0,trim(prname)//'absorb_field.bin', &
- len_trim(trim(prname)//'absorb_field.bin'), &
- filesize)
-
- endif
- endif
-
- ! acoustic domains
- if( ACOUSTIC_SIMULATION) then
- ! allocates wavefield
- allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
-
- ! size of single record
- b_reclen_potential = CUSTOM_REAL * NGLLSQUARE * num_abs_boundary_faces
-
- ! check integer size limit: size of b_reclen_potential must fit onto an 4-byte integer
- if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NGLLSQUARE) ) then
- print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_potential
- print *,' ',CUSTOM_REAL, NGLLSQUARE, num_abs_boundary_faces
- print*,'bit size fortran: ',bit_size(b_reclen_potential)
- call exit_MPI(myrank,"error b_reclen_potential integer limit")
- endif
-
- ! total file size (two lines to implicitly convert to 8-byte integers)
- filesize = b_reclen_potential
- filesize = filesize*NSTEP
-
- ! debug check size limit
- !if( NSTEP > 2147483647 / b_reclen_potential ) then
- ! print *,'file size needed exceeds integer 4-byte limit: ',b_reclen_potential,NSTEP
- ! print *,' ',CUSTOM_REAL, NGLLSQUARE, num_abs_boundary_faces,NSTEP
- ! print*,'file size fortran: ',filesize
- ! print*,'file bit size fortran: ',bit_size(filesize)
- !endif
-
- if (SIMULATION_TYPE == 3) then
- ! opens existing files
- ! uses fortran routines for reading
- !open(unit=IOABS_AC,file=trim(prname)//'absorb_potential.bin',status='old',&
- ! action='read',form='unformatted',access='direct', &
- ! recl=b_reclen_potential+2*4,iostat=ier )
- !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_potential.bin file')
-
- ! uses c routines for faster reading
- call open_file_abs_r(1,trim(prname)//'absorb_potential.bin', &
- len_trim(trim(prname)//'absorb_potential.bin'), &
- filesize)
-
- else
- ! opens new file
- ! uses fortran routines for writing
- !open(unit=IOABS_AC,file=trim(prname)//'absorb_potential.bin',status='unknown',&
- ! form='unformatted',access='direct',&
- ! recl=b_reclen_potential+2*4,iostat=ier )
- !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_potential.bin file')
- ! uses c routines for faster writing (file index 1 for acoutic domain file)
- call open_file_abs_w(1,trim(prname)//'absorb_potential.bin', &
- len_trim(trim(prname)//'absorb_potential.bin'), &
- filesize)
-
- endif
- endif
- else
- ! needs dummy array
- b_num_abs_boundary_faces = 1
- if( ELASTIC_SIMULATION ) then
- allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array b_absorb_field'
- endif
-
- if( ACOUSTIC_SIMULATION ) then
- allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
- endif
- endif
- else ! ABSORBING_CONDITIONS
- ! needs dummy array
- b_num_abs_boundary_faces = 1
- if( ELASTIC_SIMULATION ) then
- allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array b_absorb_field'
- endif
-
- if( ACOUSTIC_SIMULATION ) then
- allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
- endif
- endif
-
-
- end subroutine prepare_timerun_adjoint
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine prepare_timerun_noise()
-
-! prepares noise simulations
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
- use specfem_par_movie
- implicit none
- ! local parameters
- integer :: ier
-
- ! for noise simulations
- if ( NOISE_TOMOGRAPHY /= 0 ) then
-
- ! checks if free surface is defined
- if( num_free_surface_faces == 0 ) then
- write(*,*) myrank, " doesn't have a free_surface_face"
- ! stop 'error: noise simulations need a free surface'
- endif
-
- ! allocates arrays
- allocate(noise_sourcearray(NDIM,NGLLX,NGLLY,NGLLZ,NSTEP),stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating noise source array')
-
- allocate(normal_x_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array normal_x_noise'
- allocate(normal_y_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array normal_y_noise'
- allocate(normal_z_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array normal_z_noise'
- allocate(mask_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array mask_noise'
- allocate(noise_surface_movie(NDIM,NGLLSQUARE,num_free_surface_faces),stat=ier)
- if( ier /= 0 ) stop 'error allocating array noise_surface_movie'
-
- ! initializes
- noise_sourcearray(:,:,:,:,:) = 0._CUSTOM_REAL
- normal_x_noise(:) = 0._CUSTOM_REAL
- normal_y_noise(:) = 0._CUSTOM_REAL
- normal_z_noise(:) = 0._CUSTOM_REAL
- mask_noise(:) = 0._CUSTOM_REAL
- noise_surface_movie(:,:,:) = 0._CUSTOM_REAL
-
- ! sets up noise source for master receiver station
- call read_parameters_noise(myrank,nrec,NSTEP,NGLLSQUARE*num_free_surface_faces, &
- islice_selected_rec,xi_receiver,eta_receiver,gamma_receiver,nu, &
- noise_sourcearray,xigll,yigll,zigll, &
- ibool, &
- xstore,ystore,zstore, &
- irec_master_noise,normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
- NSPEC_AB,NGLOB_AB, &
- num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
- ispec_is_acoustic)
-
- ! checks flags for noise simulation
- call check_parameters_noise(myrank,NOISE_TOMOGRAPHY,SIMULATION_TYPE,SAVE_FORWARD, &
- LOCAL_PATH, &
- num_free_surface_faces,NSTEP)
- endif
-
- end subroutine prepare_timerun_noise
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine prepare_timerun_GPU()
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
- use specfem_par_movie
-
- implicit none
- real :: free_mb,used_mb,total_mb
- integer :: ncuda_devices,ncuda_devices_min,ncuda_devices_max
-
- ! GPU_MODE now defined in Par_file
- if(myrank == 0 ) then
- write(IMAIN,*)
- write(IMAIN,*) "GPU_MODE Active. Preparing Fields and Constants on Device."
- write(IMAIN,*)
- endif
-
- ! initializes GPU and outputs info to files for all processes
- call prepare_cuda_device(myrank,ncuda_devices)
-
- ! collects min/max of local devices found for statistics
- call sync_all()
- call min_all_i(ncuda_devices,ncuda_devices_min)
- call max_all_i(ncuda_devices,ncuda_devices_max)
-
- ! prepares general fields on GPU
- call prepare_constants_device(Mesh_pointer, &
- NGLLX, NSPEC_AB, NGLOB_AB, &
- xix, xiy, xiz, etax,etay,etaz, gammax, gammay, gammaz, &
- kappastore, mustore,ibool, &
- num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh, &
- hprime_xx, hprime_yy, hprime_zz, &
- hprimewgll_xx, hprimewgll_yy, hprimewgll_zz, &
- wgllwgll_xy, wgllwgll_xz, wgllwgll_yz, &
- ABSORBING_CONDITIONS, &
- abs_boundary_ispec, abs_boundary_ijk, &
- abs_boundary_normal, &
- abs_boundary_jacobian2Dw, &
- num_abs_boundary_faces, &
- ispec_is_inner, &
- NSOURCES, nsources_local, &
- sourcearrays, islice_selected_source, ispec_selected_source, &
- number_receiver_global, ispec_selected_rec, &
- nrec, nrec_local, &
- SIMULATION_TYPE, &
- USE_MESH_COLORING_GPU, &
- nspec_acoustic,nspec_elastic)
-
-
- ! prepares fields on GPU for acoustic simulations
- if( ACOUSTIC_SIMULATION ) then
- call prepare_fields_acoustic_device(Mesh_pointer,rmass_acoustic,rhostore,kappastore, &
- num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
- ispec_is_acoustic, &
- NOISE_TOMOGRAPHY,num_free_surface_faces, &
- free_surface_ispec,free_surface_ijk, &
- ABSORBING_CONDITIONS,b_reclen_potential,b_absorb_potential, &
- ELASTIC_SIMULATION, num_coupling_ac_el_faces, &
- coupling_ac_el_ispec,coupling_ac_el_ijk, &
- coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
- num_colors_outer_acoustic,num_colors_inner_acoustic, &
- num_elem_colors_acoustic)
-
- if( SIMULATION_TYPE == 3 ) &
- call prepare_fields_acoustic_adj_dev(Mesh_pointer, &
- SIMULATION_TYPE, &
- APPROXIMATE_HESS_KL)
-
- endif
-
- ! prepares fields on GPU for elastic simulations
- if( ELASTIC_SIMULATION ) then
- call prepare_fields_elastic_device(Mesh_pointer, NDIM*NGLOB_AB, &
- rmass,rho_vp,rho_vs, &
- num_phase_ispec_elastic,phase_ispec_inner_elastic, &
- ispec_is_elastic, &
- ABSORBING_CONDITIONS,b_absorb_field,b_reclen_field, &
- SIMULATION_TYPE,SAVE_FORWARD, &
- COMPUTE_AND_STORE_STRAIN, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz, &
- ATTENUATION, &
- size(R_xx), &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- one_minus_sum_beta,factor_common, &
- alphaval,betaval,gammaval, &
- OCEANS,rmass_ocean_load, &
- NOISE_TOMOGRAPHY, &
- free_surface_normal,free_surface_ispec,free_surface_ijk, &
- num_free_surface_faces, &
- ACOUSTIC_SIMULATION, &
- num_colors_outer_elastic,num_colors_inner_elastic, &
- num_elem_colors_elastic, &
- ANISOTROPY, &
- c11store,c12store,c13store,c14store,c15store,c16store, &
- c22store,c23store,c24store,c25store,c26store, &
- c33store,c34store,c35store,c36store, &
- c44store,c45store,c46store,c55store,c56store,c66store)
-
- if( SIMULATION_TYPE == 3 ) &
- call prepare_fields_elastic_adj_dev(Mesh_pointer, NDIM*NGLOB_AB, &
- SIMULATION_TYPE, &
- COMPUTE_AND_STORE_STRAIN, &
- epsilon_trace_over_3, &
- b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
- b_epsilondev_xz,b_epsilondev_yz, &
- b_epsilon_trace_over_3, &
- ATTENUATION,size(R_xx), &
- b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
- b_alphaval,b_betaval,b_gammaval, &
- APPROXIMATE_HESS_KL)
-
- endif
-
- ! prepares needed receiver array for adjoint runs
- if( SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3 ) &
- call prepare_sim2_or_3_const_device(Mesh_pointer, &
- islice_selected_rec,size(islice_selected_rec), &
- nadj_rec_local,nrec,myrank)
-
- ! prepares fields on GPU for noise simulations
- if ( NOISE_TOMOGRAPHY > 0 ) then
- ! note: noise tomography is only supported for elastic domains so far.
-
- ! copies noise arrays to GPU
- call prepare_fields_noise_device(Mesh_pointer, NSPEC_AB, NGLOB_AB, &
- free_surface_ispec, &
- free_surface_ijk, &
- num_free_surface_faces, &
- SIMULATION_TYPE,NOISE_TOMOGRAPHY, &
- NSTEP,noise_sourcearray, &
- normal_x_noise,normal_y_noise,normal_z_noise, &
- mask_noise,free_surface_jacobian2Dw)
-
- endif ! NOISE_TOMOGRAPHY
-
- ! prepares gravity arrays
- if( GRAVITY ) then
- call prepare_fields_gravity_device(Mesh_pointer,GRAVITY, &
- minus_deriv_gravity,minus_g,wgll_cube,&
- ACOUSTIC_SIMULATION,rhostore)
- endif
-
- ! sends initial data to device
-
- ! puts acoustic initial fields onto GPU
- if( ACOUSTIC_SIMULATION ) then
- call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
- potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
- if( SIMULATION_TYPE == 3 ) &
- call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
- b_potential_dot_acoustic,b_potential_dot_dot_acoustic,Mesh_pointer)
- endif
-
- ! puts elastic initial fields onto GPU
- if( ELASTIC_SIMULATION ) then
- ! transfer forward and backward fields to device with initial values
- call transfer_fields_el_to_device(NDIM*NGLOB_AB,displ,veloc,accel,Mesh_pointer)
- if(SIMULATION_TYPE == 3) &
- call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
- endif
-
- ! outputs GPU usage to files for all processes
- call output_free_device_memory(myrank)
-
- ! outputs usage for main process
- if( myrank == 0 ) then
- write(IMAIN,*)" GPU number of devices per node: min =",ncuda_devices_min
- write(IMAIN,*)" max =",ncuda_devices_max
- write(IMAIN,*)
-
- call get_free_device_memory(free_mb,used_mb,total_mb)
- write(IMAIN,*)" GPU usage: free =",free_mb," MB",nint(free_mb/total_mb*100.0),"%"
- write(IMAIN,*)" used =",used_mb," MB",nint(used_mb/total_mb*100.0),"%"
- write(IMAIN,*)" total =",total_mb," MB",nint(total_mb/total_mb*100.0),"%"
- write(IMAIN,*)
- endif
-
- end subroutine prepare_timerun_GPU
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -35,15 +35,7 @@
implicit none
real(kind=CUSTOM_REAL):: minl,maxl,min_all,max_all
integer :: ier,inum
- integer NUM_THREADS
- integer OMP_GET_MAX_THREADS
-
- NUM_THREADS = OMP_GET_MAX_THREADS()
- if( myrank == 0 ) then
- write(IMAIN,*) 'Using:',NUM_THREADS, ' OpenMP threads'
- endif
-
-
+
! start reading the databasesa
! info about external mesh simulation
@@ -122,30 +114,6 @@
allocate(accel(NDIM,NGLOB_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating array accel'
- ! allocate cfe_Dev_openmp local arrays for OpenMP version
- allocate(dummyx_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(dummyy_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(dummyz_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(newtempx1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(newtempx2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(newtempx3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(newtempy1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(newtempy2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(newtempy3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(newtempz1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(newtempz2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(newtempz3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(tempx1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(tempx2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(tempx3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(tempy1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(tempy2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(tempy3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(tempz1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(tempz2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
- allocate(tempz3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-
-
allocate(rmass(NGLOB_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating array rmass'
allocate(rho_vp(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
@@ -204,7 +172,7 @@
! reads mass matrices
read(27,iostat=ier) rmass
if( ier /= 0 ) stop 'error reading in array rmass'
-
+
if( OCEANS ) then
! ocean mass matrix
allocate(rmass_ocean_load(NGLOB_AB),stat=ier)
@@ -221,24 +189,24 @@
if( ier /= 0 ) stop 'error reading in array rho_vp'
read(27,iostat=ier) rho_vs
if( ier /= 0 ) stop 'error reading in array rho_vs'
-
+
! checks if rhostore is available for gravity
if( GRAVITY ) then
-
+
if( .not. ACOUSTIC_SIMULATION ) then
! rho array needed for gravity
allocate(rhostore(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
if( ier /= 0 ) stop 'error allocating array rhostore'
-
+
! extract rho information from mu = rho * vs * vs and rho_vs = rho * vs
rhostore = 0.0_CUSTOM_REAL
- where( mustore > TINYVAL )
+ where( mustore > TINYVAL )
rhostore = (rho_vs*rho_vs) / mustore
endwhere
! note: the construct below leads to a segmentation fault (ifort v11.1). not sure why...
! (where statement - standard fortran 95)
- !where( mustore > TINYVAL )
+ !where( mustore > TINYVAL )
! rhostore = (rho_vs*rho_vs) / mustore
!elsewhere
! rhostore = 0.0_CUSTOM_REAL
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -288,7 +288,7 @@
dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3,&
newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3,&
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
-
+
! mass matrix
real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90 2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90 2012-02-25 01:37:33 UTC (rev 19680)
@@ -91,7 +91,8 @@
hgammar(:) = hgammar_store(irec_local,:)
! forward simulations
- if (SIMULATION_TYPE == 1) then
+ select case( SIMULATION_TYPE )
+ case( 1 )
! receiver's spectral element
ispec = ispec_selected_rec(irec)
@@ -132,7 +133,7 @@
endif ! acoustic
!adjoint simulations
- else if (SIMULATION_TYPE == 2) then
+ case( 2 )
! adjoint source is placed at receiver
ispec = ispec_selected_source(irec)
@@ -208,7 +209,7 @@
endif ! acoustic
!adjoint simulations
- else if (SIMULATION_TYPE == 3) then
+ case( 3 )
ispec = ispec_selected_rec(irec)
@@ -247,7 +248,7 @@
dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
endif ! acoustic
- endif ! SIMULATION_TYPE
+ end select ! SIMULATION_TYPE
! store North, East and Vertical components
! distinguish between single and double precision for reals
@@ -269,12 +270,9 @@
! write the current or final seismograms
if((mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0 .or. it == NSTEP) .and. (.not.SU_FORMAT)) then
if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
- call write_seismograms_to_file(myrank,seismograms_d,number_receiver_global,station_name, &
- network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,1,SIMULATION_TYPE)
- call write_seismograms_to_file(myrank,seismograms_v,number_receiver_global,station_name, &
- network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,2,SIMULATION_TYPE)
- call write_seismograms_to_file(myrank,seismograms_a,number_receiver_global,station_name, &
- network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,3,SIMULATION_TYPE)
+ call write_seismograms_to_file(seismograms_d,1)
+ call write_seismograms_to_file(seismograms_v,2)
+ call write_seismograms_to_file(seismograms_a,3)
else
call write_adj_seismograms_to_file(myrank,seismograms_d,number_receiver_global, &
nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,1)
@@ -294,28 +292,19 @@
! write seismograms to text files
- subroutine write_seismograms_to_file(myrank,seismograms,number_receiver_global, &
- station_name,network_name,nrec,nrec_local, &
- it,DT,NSTEP,t0,LOCAL_PATH,istore,SIMULATION_TYPE)
+ subroutine write_seismograms_to_file(seismograms,istore)
+ use constants
+ use specfem_par,only: &
+ myrank,number_receiver_global,station_name,network_name, &
+ nrec,nrec_local,islice_selected_rec, &
+ it,DT,NSTEP,t0,LOCAL_PATH,SIMULATION_TYPE
+
implicit none
- include "constants.h"
-
- integer :: NSTEP,it
- integer :: nrec,nrec_local
- integer :: myrank,istore
- integer :: SIMULATION_TYPE
-
- integer, dimension(nrec_local) :: number_receiver_global
+ integer :: istore
real(kind=CUSTOM_REAL), dimension(NDIM,nrec_local,NSTEP) :: seismograms
- double precision t0,DT
-
- character(len=256) LOCAL_PATH
- character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
- character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
-
! local parameters
integer irec,irec_local
integer irecord
@@ -327,6 +316,7 @@
integer :: nrec_local_received,NPROCTOT,total_seismos,receiver,sender
integer :: iproc,ier
integer,dimension(1) :: tmp_nrec_local_received,tmp_irec,tmp_nrec_local
+ integer,dimension(:),allocatable:: islice_num_rec_local
! saves displacement, velocity or acceleration
if(istore == 1) then
@@ -374,8 +364,22 @@
! loop on all the slices
call world_size(NPROCTOT)
+
+ ! counts number of local receivers for each slice
+ allocate(islice_num_rec_local(0:NPROCTOT-1),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating islice_num_rec_local')
+ islice_num_rec_local(:) = 0
+ do irec = 1,nrec
+ iproc = islice_selected_rec(irec)
+ islice_num_rec_local(iproc) = islice_num_rec_local(iproc) + 1
+ enddo
+
+ ! loops on all the slices
do iproc = 0,NPROCTOT-1
+ ! communicate only with processes which contain local receivers
+ if( islice_num_rec_local(iproc) == 0 ) cycle
+
! receive except from proc 0, which is me and therefore I already have this value
sender = iproc
if(iproc /= 0) then
@@ -415,6 +419,7 @@
enddo ! nrec_local_received
endif ! if(nrec_local_received > 0 )
enddo ! NPROCTOT-1
+ deallocate(islice_num_rec_local)
write(IMAIN,*) 'Component: .sem'//component
write(IMAIN,*) ' total number of receivers saved is ',total_seismos,' out of ',nrec
Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/create_specfem3D_gpu_cuda_method_stubs.pl
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/create_specfem3D_gpu_cuda_method_stubs.pl (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/create_specfem3D_gpu_cuda_method_stubs.pl 2012-02-25 01:37:33 UTC (rev 19680)
@@ -0,0 +1,141 @@
+#!/usr/bin/perl
+
+#
+# Script to extract the function declarations in cuda files
+#
+#
+# usage: ./ceate_specfem3D_gpu_cuda_method_stubs.pl
+# run in directory root SPECFEM3D/
+#
+
+$outfile = "src/cuda/specfem3D_gpu_cuda_method_stubs.c";
+
+
+open(IOUT,"> _____temp_tutu_____");
+
+$header = <<END;
+/*
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "config.h"
+
+typedef float realw;
+
+END
+
+
+$warning = <<END;
+ fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\\n");
+ exit(1);
+END
+
+print IOUT "$header \n";
+
+$success = 0;
+
+ at objects = `ls src/cuda/*.cu`;
+
+foreach $name (@objects) {
+ chop $name;
+ print "extracting word in file $name ...\n";
+
+ print IOUT "\n//\n// $name\n//\n\n";
+
+ # change tabs to white spaces
+ system("expand -2 < $name > _____temp_tutu01_____");
+ open(IIN,"<_____temp_tutu01_____");
+
+
+ # open the source file
+ $success = 1;
+ $do_extract = 0;
+ while($line = <IIN>) {
+ chop $line;
+
+ # suppress trailing white spaces and carriage return
+ $line =~ s/\s*$//;
+
+ # change the version number and copyright information
+ # $line =~ s#\(c\) California Institute of Technology and University of Pau, October 2007#\(c\) California Institute of Technology and University of Pau, November 2007#og;
+ # $line =~ s#rmass_sigma#rmass_time_integral_of_sigma#og;
+
+ if($line =~ /extern "C"/){
+ # new function declaration starts
+ #print "$line\n";
+ if( $line =~/FC_FUNC/ ){
+ # function declaration on same line as extern, ask for line skip
+ print "problem: please add a line break after extern 'C' here:";
+ print "$line\n";
+ $success = 0;
+ close(IIN);
+ exit;
+ }
+ $do_extract = 1;
+ next;
+ }
+
+ # extract section
+ if($do_extract == 1 ){
+ # function declaration
+ if($line =~ /{/){
+ # function declaration ends
+ if( $line =~ /PREPARE_CUDA_DEVICE/ ){
+ # adds warning
+ print IOUT "$line \n$warning\} \n\n";
+ }else{
+ print IOUT "$line\} \n\n";
+ }
+ $do_extract = 0;
+ }else{
+ # write line to the output file
+ print IOUT "$line\n";
+ }
+ next;
+ }
+ }
+ close(IIN);
+
+ if( $success == 0 ){ exit; }
+}
+
+close(IOUT);
+system("rm -f _____temp_tutu01_____");
+
+# creates new stubs file if successful
+if( $success == 1 ){
+ print "\n\nsuccessfully extracted declarations \n\n";
+ system("cp -p $outfile $outfile.bak");
+ system("cp -p _____temp_tutu_____ $outfile");
+ print "created new: $outfile \n";
+}
+system("rm -f _____temp_tutu_____");
+
+
Property changes on: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/create_specfem3D_gpu_cuda_method_stubs.pl
___________________________________________________________________
Name: svn:executable
+ *
More information about the CIG-COMMITS
mailing list