[cig-commits] r19027 - in seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER: . examples/homogeneous_halfspace/in_data_files examples/layered_halfspace/in_data_files examples/meshfem3D_examples/many_interfaces examples/meshfem3D_examples/simple_model examples/tomographic_model/in_data_files examples/waterlayered_halfspace/in_data_files in_data_files src src/check_mesh_quality_CUBIT_Abaqus src/cuda src/shared src/specfem3D utils/unused_routines/create_movie_GMT utils/unused_routines/files_needed_asteroid/DATA utils/unused_routines/from_old_DATA/par_files
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Wed Oct 5 20:31:25 PDT 2011
Author: danielpeter
Date: 2011-10-05 20:31:24 -0700 (Wed, 05 Oct 2011)
New Revision: 19027
Added:
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/check_mesh_quality_CUBIT_Abaqus/constants.h
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/save_and_compare_cpu_vs_gpu.c
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90
Removed:
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_boundary_kernel.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_kernels.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_and_compare_cpu_vs_gpu.c
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/it_update_displacement_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/mesh_constants_cuda.h
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_constants_cuda.h
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms_cuda.cu
Modified:
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/in_data_files/Par_file
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/layered_halfspace/in_data_files/Par_file
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/meshfem3D_examples/many_interfaces/Par_file
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/meshfem3D_examples/simple_model/Par_file
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/tomographic_model/in_data_files/Par_file
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/in_data_files/Par_file.in
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/assemble_MPI_scalar.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_parameter_file.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/create_movie_GMT/Par_file
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/files_needed_asteroid/DATA/Par_file
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_Carcione_copper_ori_r2d2_serial
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_Carcione_copper_very_small_serial
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_gros
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_petit
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_small_4
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_SC_288
seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_SC_384
Log:
updates for acoustic routines, configure and Makefile.in
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,24 +1,20 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.66 for Specfem 3D 2.0.1.
+# Generated by GNU Autoconf 2.63 for Specfem 3D 2.0.1.
#
# Report bugs to <jtromp AT princeton.edu>.
#
-#
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
-# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
-# Foundation, Inc.
-#
-#
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
# This configure script is free software; the Free Software Foundation
# gives unlimited permission to copy, distribute and modify it.
-## -------------------- ##
-## M4sh Initialization. ##
-## -------------------- ##
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
# Be more Bourne compatible
DUALCASE=1; export DUALCASE # for MKS sh
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
emulate sh
NULLCMD=:
# Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
@@ -26,15 +22,23 @@
alias -g '${1+"$@"}'='"$@"'
setopt NO_GLOB_SUBST
else
- case `(set -o) 2>/dev/null` in #(
- *posix*) :
- set -o posix ;; #(
- *) :
- ;;
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
esac
+
fi
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
as_nl='
'
export as_nl
@@ -42,13 +46,7 @@
as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
-# Prefer a ksh shell builtin over an external printf program on Solaris,
-# but without wasting forks for bash or zsh.
-if test -z "$BASH_VERSION$ZSH_VERSION" \
- && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
- as_echo='print -r --'
- as_echo_n='print -rn --'
-elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
as_echo='printf %s\n'
as_echo_n='printf %s'
else
@@ -59,7 +57,7 @@
as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
as_echo_n_body='eval
arg=$1;
- case $arg in #(
+ case $arg in
*"$as_nl"*)
expr "X$arg" : "X\\(.*\\)$as_nl";
arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
@@ -82,7 +80,14 @@
}
fi
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
# IFS
# We need space, tab and new line, in precisely that order. Quoting is
# there to prevent editors from complaining about space-tab.
@@ -91,15 +96,15 @@
IFS=" "" $as_nl"
# Find who we are. Look in the path if we contain no directory separator.
-case $0 in #((
+case $0 in
*[\\/]* ) as_myself=$0 ;;
*) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
- done
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
IFS=$as_save_IFS
;;
@@ -111,16 +116,12 @@
fi
if test ! -f "$as_myself"; then
$as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
- exit 1
+ { (exit 1); exit 1; }
fi
-# Unset variables that we do not need and which cause bugs (e.g. in
-# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
-# suppresses any "Segmentation fault" message there. '((' could
-# trigger a bug in pdksh 5.2.14.
-for as_var in BASH_ENV ENV MAIL MAILPATH
-do eval test x\${$as_var+set} = xset \
- && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
done
PS1='$ '
PS2='> '
@@ -132,300 +133,330 @@
LANGUAGE=C
export LANGUAGE
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+
# CDPATH.
-(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+$as_unset CDPATH
+
if test "x$CONFIG_SHELL" = x; then
- as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
- emulate sh
- NULLCMD=:
- # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '\${1+\"\$@\"}'='\"\$@\"'
- setopt NO_GLOB_SUBST
+ if (eval ":") 2>/dev/null; then
+ as_have_required=yes
else
- case \`(set -o) 2>/dev/null\` in #(
- *posix*) :
- set -o posix ;; #(
- *) :
- ;;
-esac
+ as_have_required=no
fi
-"
- as_required="as_fn_return () { (exit \$1); }
-as_fn_success () { as_fn_return 0; }
-as_fn_failure () { as_fn_return 1; }
-as_fn_ret_success () { return 0; }
-as_fn_ret_failure () { return 1; }
+ if test $as_have_required = yes && (eval ":
+(as_func_return () {
+ (exit \$1)
+}
+as_func_success () {
+ as_func_return 0
+}
+as_func_failure () {
+ as_func_return 1
+}
+as_func_ret_success () {
+ return 0
+}
+as_func_ret_failure () {
+ return 1
+}
+
exitcode=0
-as_fn_success || { exitcode=1; echo as_fn_success failed.; }
-as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
-as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
-as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
-if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
+if as_func_success; then
+ :
+else
+ exitcode=1
+ echo as_func_success failed.
+fi
+if as_func_failure; then
+ exitcode=1
+ echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+ :
else
- exitcode=1; echo positional parameters were not saved.
+ exitcode=1
+ echo as_func_ret_success failed.
fi
-test x\$exitcode = x0 || exit 1"
- as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
- as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
- eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
- test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
-test \$(( 1 + 1 )) = 2 || exit 1"
- if (eval "$as_required") 2>/dev/null; then :
- as_have_required=yes
+
+if as_func_ret_failure; then
+ exitcode=1
+ echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+ :
else
- as_have_required=no
+ exitcode=1
+ echo positional parameters were not saved.
fi
- if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then :
+test \$exitcode = 0) || { (exit 1); exit 1; }
+
+(
+ as_lineno_1=\$LINENO
+ as_lineno_2=\$LINENO
+ test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" &&
+ test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; }
+") 2> /dev/null; then
+ :
else
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-as_found=false
+ as_candidate_shells=
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- as_found=:
- case $as_dir in #(
+ case $as_dir in
/*)
for as_base in sh bash ksh sh5; do
- # Try only shells that exist, to save several forks.
- as_shell=$as_dir/$as_base
- if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
- { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then :
- CONFIG_SHELL=$as_shell as_have_required=yes
- if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then :
- break 2
-fi
-fi
+ as_candidate_shells="$as_candidate_shells $as_dir/$as_base"
done;;
esac
- as_found=false
done
-$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
- { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then :
- CONFIG_SHELL=$SHELL as_have_required=yes
-fi; }
IFS=$as_save_IFS
- if test "x$CONFIG_SHELL" != x; then :
- # We cannot yet assume a decent shell, so we have to provide a
- # neutralization value for shells without unset; and this also
- # works around shells that cannot unset nonexistent variables.
- BASH_ENV=/dev/null
- ENV=/dev/null
- (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
- export CONFIG_SHELL
- exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+ for as_shell in $as_candidate_shells $SHELL; do
+ # Try only shells that exist, to save several forks.
+ if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+ { ("$as_shell") 2> /dev/null <<\_ASEOF
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
+esac
+
fi
- if test x$as_have_required = xno; then :
- $as_echo "$0: This script requires a shell more modern than all"
- $as_echo "$0: the shells that I found on your system."
- if test x${ZSH_VERSION+set} = xset ; then
- $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should"
- $as_echo "$0: be upgraded to zsh 4.3.4 or later."
- else
- $as_echo "$0: Please tell bug-autoconf at gnu.org and jtromp AT
-$0: princeton.edu about your system, including any error
-$0: possibly output before this message. Then install a
-$0: modern shell, or manually run the script under such a
-$0: shell if you do have one."
- fi
- exit 1
+
+:
+_ASEOF
+}; then
+ CONFIG_SHELL=$as_shell
+ as_have_required=yes
+ if { "$as_shell" 2> /dev/null <<\_ASEOF
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
+esac
+
fi
+
+
+:
+(as_func_return () {
+ (exit $1)
+}
+as_func_success () {
+ as_func_return 0
+}
+as_func_failure () {
+ as_func_return 1
+}
+as_func_ret_success () {
+ return 0
+}
+as_func_ret_failure () {
+ return 1
+}
+
+exitcode=0
+if as_func_success; then
+ :
+else
+ exitcode=1
+ echo as_func_success failed.
fi
+
+if as_func_failure; then
+ exitcode=1
+ echo as_func_failure succeeded.
fi
-SHELL=${CONFIG_SHELL-/bin/sh}
-export SHELL
-# Unset more variables known to interfere with behavior of common tools.
-CLICOLOR_FORCE= GREP_OPTIONS=
-unset CLICOLOR_FORCE GREP_OPTIONS
-## --------------------- ##
-## M4sh Shell Functions. ##
-## --------------------- ##
-# as_fn_unset VAR
-# ---------------
-# Portably unset VAR.
-as_fn_unset ()
-{
- { eval $1=; unset $1;}
-}
-as_unset=as_fn_unset
+if as_func_ret_success; then
+ :
+else
+ exitcode=1
+ echo as_func_ret_success failed.
+fi
-# as_fn_set_status STATUS
-# -----------------------
-# Set $? to STATUS, without forking.
-as_fn_set_status ()
-{
- return $1
-} # as_fn_set_status
+if as_func_ret_failure; then
+ exitcode=1
+ echo as_func_ret_failure succeeded.
+fi
-# as_fn_exit STATUS
-# -----------------
-# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
-as_fn_exit ()
-{
- set +e
- as_fn_set_status $1
- exit $1
-} # as_fn_exit
+if ( set x; as_func_ret_success y && test x = "$1" ); then
+ :
+else
+ exitcode=1
+ echo positional parameters were not saved.
+fi
-# as_fn_mkdir_p
-# -------------
-# Create "$as_dir" as a directory, including parents if necessary.
-as_fn_mkdir_p ()
-{
+test $exitcode = 0) || { (exit 1); exit 1; }
- case $as_dir in #(
- -*) as_dir=./$as_dir;;
- esac
- test -d "$as_dir" || eval $as_mkdir_p || {
- as_dirs=
- while :; do
- case $as_dir in #(
- *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
- *) as_qdir=$as_dir;;
- esac
- as_dirs="'$as_qdir' $as_dirs"
- as_dir=`$as_dirname -- "$as_dir" ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
- test -d "$as_dir" && break
- done
- test -z "$as_dirs" || eval "mkdir $as_dirs"
- } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+(
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; }
+_ASEOF
+}; then
+ break
+fi
-} # as_fn_mkdir_p
-# as_fn_append VAR VALUE
-# ----------------------
-# Append the text in VALUE to the end of the definition contained in VAR. Take
-# advantage of any shell optimizations that allow amortized linear growth over
-# repeated appends, instead of the typical quadratic growth present in naive
-# implementations.
-if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
- eval 'as_fn_append ()
- {
- eval $1+=\$2
- }'
-else
- as_fn_append ()
- {
- eval $1=\$$1\$2
- }
-fi # as_fn_append
+fi
-# as_fn_arith ARG...
-# ------------------
-# Perform arithmetic evaluation on the ARGs, and store the result in the
-# global $as_val. Take advantage of shells that can avoid forks. The arguments
-# must be portable across $(()) and expr.
-if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
- eval 'as_fn_arith ()
- {
- as_val=$(( $* ))
- }'
-else
- as_fn_arith ()
- {
- as_val=`expr "$@" || test $? -eq 1`
- }
-fi # as_fn_arith
+ done
+ if test "x$CONFIG_SHELL" != x; then
+ for as_var in BASH_ENV ENV
+ do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+ done
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+fi
-# as_fn_error STATUS ERROR [LINENO LOG_FD]
-# ----------------------------------------
-# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
-# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with STATUS, using 1 if that was 0.
-as_fn_error ()
-{
- as_status=$1; test $as_status -eq 0 && as_status=1
- if test "$4"; then
- as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
- fi
- $as_echo "$as_me: error: $2" >&2
- as_fn_exit $as_status
-} # as_fn_error
-if expr a : '\(a\)' >/dev/null 2>&1 &&
- test "X`expr 00001 : '.*\(...\)'`" = X001; then
- as_expr=expr
+ if test $as_have_required = no; then
+ echo This script requires a shell more modern than all the
+ echo shells that I found on your system. Please install a
+ echo modern shell, or manually run the script under such a
+ echo shell if you do have one.
+ { (exit 1); exit 1; }
+fi
+
+
+fi
+
+fi
+
+
+
+(eval "as_func_return () {
+ (exit \$1)
+}
+as_func_success () {
+ as_func_return 0
+}
+as_func_failure () {
+ as_func_return 1
+}
+as_func_ret_success () {
+ return 0
+}
+as_func_ret_failure () {
+ return 1
+}
+
+exitcode=0
+if as_func_success; then
+ :
else
- as_expr=false
+ exitcode=1
+ echo as_func_success failed.
fi
-if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
- as_basename=basename
+if as_func_failure; then
+ exitcode=1
+ echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+ :
else
- as_basename=false
+ exitcode=1
+ echo as_func_ret_success failed.
fi
-if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
- as_dirname=dirname
+if as_func_ret_failure; then
+ exitcode=1
+ echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+ :
else
- as_dirname=false
+ exitcode=1
+ echo positional parameters were not saved.
fi
-as_me=`$as_basename -- "$0" ||
-$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
- X"$0" : 'X\(//\)$' \| \
- X"$0" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X/"$0" |
- sed '/^.*\/\([^/][^/]*\)\/*$/{
- s//\1/
- q
- }
- /^X\/\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\/\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
+test \$exitcode = 0") || {
+ echo No shell found that supports shell functions.
+ echo Please tell bug-autoconf at gnu.org about your system,
+ echo including any error possibly output before this message.
+ echo This can help us improve future autoconf versions.
+ echo Configuration will now proceed without shell functions.
+}
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
- as_lineno_1=$LINENO as_lineno_1a=$LINENO
- as_lineno_2=$LINENO as_lineno_2a=$LINENO
- eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
- test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
- # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-)
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line after each line using $LINENO; the second 'sed'
+ # does the real work. The second script uses 'N' to pair each
+ # line-number line with the line containing $LINENO, and appends
+ # trailing '-' during substitution so that $LINENO is not a special
+ # case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # scripts with optimization help from Paolo Bonzini. Blame Lee
+ # E. McMahon (1931-1989) for sed's syntax. :-)
sed -n '
p
/[$]LINENO/=
@@ -442,7 +473,8 @@
s/-\n.*//
' >$as_me.lineno &&
chmod +x "$as_me.lineno" ||
- { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+ { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+ { (exit 1); exit 1; }; }
# Don't try to exec as it changes $[0], causing all sort of problems
# (the dirname of $[0] is not the place where we might find the
@@ -452,18 +484,29 @@
exit
}
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
ECHO_C= ECHO_N= ECHO_T=
-case `echo -n x` in #(((((
+case `echo -n x` in
-n*)
- case `echo 'xy\c'` in
+ case `echo 'x\c'` in
*c*) ECHO_T=' ';; # ECHO_T is single tab character.
- xy) ECHO_C='\c';;
- *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
- ECHO_T=' ';;
+ *) ECHO_C='\c';;
esac;;
*)
ECHO_N='-n';;
esac
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
rm -f conf$$ conf$$.exe conf$$.file
if test -d conf$$.dir; then
@@ -493,7 +536,7 @@
rmdir conf$$.dir 2>/dev/null
if mkdir -p . 2>/dev/null; then
- as_mkdir_p='mkdir -p "$as_dir"'
+ as_mkdir_p=:
else
test -d ./-p && rmdir ./-p
as_mkdir_p=false
@@ -512,10 +555,10 @@
if test -d "$1"; then
test -d "$1/.";
else
- case $1 in #(
+ case $1 in
-*)set "./$1";;
esac;
- case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
+ case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in
???[sx]*):;;*)false;;esac;fi
'\'' sh
'
@@ -529,11 +572,11 @@
as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
-test -n "$DJDIR" || exec 7<&0 </dev/null
-exec 6>&1
+exec 7<&0 </dev/null 6>&1
+
# Name of the host.
-# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
# so uname gets run too.
ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
@@ -548,6 +591,7 @@
subdirs=
MFLAGS=
MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='Specfem 3D'
@@ -555,7 +599,6 @@
PACKAGE_VERSION='2.0.1'
PACKAGE_STRING='Specfem 3D 2.0.1'
PACKAGE_BUGREPORT='jtromp AT princeton.edu'
-PACKAGE_URL=''
ac_unique_file="README"
# Factoring default headers for most tests.
@@ -601,6 +644,9 @@
PYTHON_EGG_CFLAGS
PYTHONPATH
LOCAL_PATH_IS_ALSO_GLOBAL
+MPI_INC
+CUDA_INC
+CUDA_LIB
FLAGS_NO_CHECK
FLAGS_CHECK
MPICC
@@ -665,6 +711,8 @@
PYTHON_PREFIX
PYTHON_VERSION
PYTHON
+COND_CUDA_FALSE
+COND_CUDA_TRUE
CUSTOM_MPI_TYPE
CUSTOM_REAL
COND_MPI_FALSE
@@ -701,7 +749,6 @@
program_transform_name
prefix
exec_prefix
-PACKAGE_URL
PACKAGE_BUGREPORT
PACKAGE_STRING
PACKAGE_VERSION
@@ -715,6 +762,7 @@
with_pyre
with_mpi
enable_double_precision
+with_cuda
with_scotch_dir
with_scotch_includedir
with_scotch_libdir
@@ -741,6 +789,9 @@
MPICC
FLAGS_CHECK
FLAGS_NO_CHECK
+CUDA_LIB
+CUDA_INC
+MPI_INC
LOCAL_PATH_IS_ALSO_GLOBAL
PYTHON
PYTHONPATH'
@@ -852,7 +903,8 @@
ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid feature name: $ac_useropt"
+ { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2
+ { (exit 1); exit 1; }; }
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
@@ -878,7 +930,8 @@
ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid feature name: $ac_useropt"
+ { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2
+ { (exit 1); exit 1; }; }
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
@@ -1082,7 +1135,8 @@
ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid package name: $ac_useropt"
+ { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2
+ { (exit 1); exit 1; }; }
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
@@ -1098,7 +1152,8 @@
ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid package name: $ac_useropt"
+ { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2
+ { (exit 1); exit 1; }; }
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
@@ -1128,17 +1183,17 @@
| --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
x_libraries=$ac_optarg ;;
- -*) as_fn_error $? "unrecognized option: \`$ac_option'
-Try \`$0 --help' for more information"
+ -*) { $as_echo "$as_me: error: unrecognized option: $ac_option
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; }
;;
*=*)
ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
# Reject names that are not valid shell variable names.
- case $ac_envvar in #(
- '' | [0-9]* | *[!_$as_cr_alnum]* )
- as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
- esac
+ expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
+ { $as_echo "$as_me: error: invalid variable name: $ac_envvar" >&2
+ { (exit 1); exit 1; }; }
eval $ac_envvar=\$ac_optarg
export $ac_envvar ;;
@@ -1155,13 +1210,15 @@
if test -n "$ac_prev"; then
ac_option=--`echo $ac_prev | sed 's/_/-/g'`
- as_fn_error $? "missing argument to $ac_option"
+ { $as_echo "$as_me: error: missing argument to $ac_option" >&2
+ { (exit 1); exit 1; }; }
fi
if test -n "$ac_unrecognized_opts"; then
case $enable_option_checking in
no) ;;
- fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
+ fatal) { $as_echo "$as_me: error: unrecognized options: $ac_unrecognized_opts" >&2
+ { (exit 1); exit 1; }; } ;;
*) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
esac
fi
@@ -1184,7 +1241,8 @@
[\\/$]* | ?:[\\/]* ) continue;;
NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
esac
- as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
+ { $as_echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+ { (exit 1); exit 1; }; }
done
# There might be people who depend on the old broken behavior: `$host'
@@ -1198,8 +1256,8 @@
if test "x$host_alias" != x; then
if test "x$build_alias" = x; then
cross_compiling=maybe
- $as_echo "$as_me: WARNING: if you wanted to set the --build type, don't use --host.
- If a cross compiler is detected then cross compile mode will be used" >&2
+ $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+ If a cross compiler is detected then cross compile mode will be used." >&2
elif test "x$build_alias" != "x$host_alias"; then
cross_compiling=yes
fi
@@ -1214,9 +1272,11 @@
ac_pwd=`pwd` && test -n "$ac_pwd" &&
ac_ls_di=`ls -di .` &&
ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
- as_fn_error $? "working directory cannot be determined"
+ { $as_echo "$as_me: error: working directory cannot be determined" >&2
+ { (exit 1); exit 1; }; }
test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
- as_fn_error $? "pwd does not report name of working directory"
+ { $as_echo "$as_me: error: pwd does not report name of working directory" >&2
+ { (exit 1); exit 1; }; }
# Find the source files, if location was not specified.
@@ -1255,11 +1315,13 @@
fi
if test ! -r "$srcdir/$ac_unique_file"; then
test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
- as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
+ { $as_echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+ { (exit 1); exit 1; }; }
fi
ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
ac_abs_confdir=`(
- cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
+ cd "$srcdir" && test -r "./$ac_unique_file" || { $as_echo "$as_me: error: $ac_msg" >&2
+ { (exit 1); exit 1; }; }
pwd)`
# When building in place, set srcdir=.
if test "$ac_abs_confdir" = "$ac_pwd"; then
@@ -1299,7 +1361,7 @@
--help=short display options specific to this package
--help=recursive display the short help of all the included packages
-V, --version display version information and exit
- -q, --quiet, --silent do not print \`checking ...' messages
+ -q, --quiet, --silent do not print \`checking...' messages
--cache-file=FILE cache test results in FILE [disabled]
-C, --config-cache alias for \`--cache-file=config.cache'
-n, --no-create do not create output files
@@ -1366,6 +1428,7 @@
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
--with-pyre build Pyrized version [default=no]
--with-mpi build parallel version [default=yes]
+ --with-cuda build cuda GPU enabled version [default=no]
--with-scotch-dir define the root path to Scotch (e.g. /opt/scotch/)
--with-scotch-includedir
define the path to the Scotch headers (e.g.
@@ -1381,7 +1444,7 @@
LIBS libraries to pass to the linker, e.g. -l<library>
CC C compiler command
CFLAGS C compiler flags
- CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
+ CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
you have headers in a nonstandard directory <include dir>
CPP C preprocessor
YACC The `Yet Another C Compiler' implementation to use. Defaults to
@@ -1403,6 +1466,10 @@
FLAGS_NO_CHECK
Fortran compiler flags for creating fast, production-run code
for critical subroutines
+ CUDA_LIB Location of CUDA library libcudart
+ CUDA_INC Location of CUDA include files
+ MPI_INC Location of MPI include mpi.h, which is needed by nvcc when
+ compiling cuda files
LOCAL_PATH_IS_ALSO_GLOBAL
files on a local path on each node are also seen as global with
same path [default=true]
@@ -1476,393 +1543,21 @@
if $ac_init_version; then
cat <<\_ACEOF
Specfem 3D configure 2.0.1
-generated by GNU Autoconf 2.66
+generated by GNU Autoconf 2.63
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
This configure script is free software; the Free Software Foundation
gives unlimited permission to copy, distribute and modify it.
_ACEOF
exit
fi
-
-## ------------------------ ##
-## Autoconf initialization. ##
-## ------------------------ ##
-
-# ac_fn_fc_try_compile LINENO
-# ---------------------------
-# Try to compile conftest.$ac_ext, and return whether this succeeded.
-ac_fn_fc_try_compile ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext
- if { { ac_try="$ac_compile"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compile") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_fc_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest.$ac_objext; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
- as_fn_set_status $ac_retval
-
-} # ac_fn_fc_try_compile
-
-# ac_fn_c_try_compile LINENO
-# --------------------------
-# Try to compile conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_compile ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext
- if { { ac_try="$ac_compile"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compile") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_c_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest.$ac_objext; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
- as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_compile
-
-# ac_fn_c_try_link LINENO
-# -----------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_link ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext conftest$ac_exeext
- if { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_c_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest$ac_exeext && {
- test "$cross_compiling" = yes ||
- $as_test_x conftest$ac_exeext
- }; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
- # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
- # interfere with the next link command; also delete a directory that is
- # left behind by Apple's compiler. We do this before executing the actions.
- rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
- as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_link
-
-# ac_fn_c_try_cpp LINENO
-# ----------------------
-# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_cpp ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if { { ac_try="$ac_cpp conftest.$ac_ext"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } >/dev/null && {
- test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
- test ! -s conftest.err
- }; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
- as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_cpp
-
-# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
-# -------------------------------------------------------
-# Tests whether HEADER exists, giving a warning if it cannot be compiled using
-# the include files in INCLUDES and setting the cache variable VAR
-# accordingly.
-ac_fn_c_check_header_mongrel ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if eval "test \"\${$3+set}\"" = set; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
- $as_echo_n "(cached) " >&6
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-else
- # Is the header compilable?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
-$as_echo_n "checking $2 usability... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-#include <$2>
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- ac_header_compiler=yes
-else
- ac_header_compiler=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
-$as_echo "$ac_header_compiler" >&6; }
-
-# Is the header present?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
-$as_echo_n "checking $2 presence... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <$2>
-_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
- ac_header_preproc=yes
-else
- ac_header_preproc=no
-fi
-rm -f conftest.err conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
-$as_echo "$ac_header_preproc" >&6; }
-
-# So? What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
- yes:no: )
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
-$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
- ;;
- no:yes:* )
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
-$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5
-$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
-$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5
-$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
-( $as_echo "## -------------------------------------- ##
-## Report this to jtromp AT princeton.edu ##
-## -------------------------------------- ##"
- ) | sed "s/^/$as_me: WARNING: /" >&2
- ;;
-esac
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
- $as_echo_n "(cached) " >&6
-else
- eval "$3=\$ac_header_compiler"
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-
-} # ac_fn_c_check_header_mongrel
-
-# ac_fn_c_try_run LINENO
-# ----------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
-# that executables *can* be run.
-ac_fn_c_try_run ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
- { { case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
- ac_retval=0
-else
- $as_echo "$as_me: program exited with status $ac_status" >&5
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=$ac_status
-fi
- rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
- as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_run
-
-# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
-# -------------------------------------------------------
-# Tests whether HEADER exists and can be compiled using the include files in
-# INCLUDES, setting the cache variable VAR accordingly.
-ac_fn_c_check_header_compile ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-#include <$2>
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- eval "$3=yes"
-else
- eval "$3=no"
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-
-} # ac_fn_c_check_header_compile
-
-# ac_fn_fc_try_link LINENO
-# ------------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded.
-ac_fn_fc_try_link ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext conftest$ac_exeext
- if { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_fc_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest$ac_exeext && {
- test "$cross_compiling" = yes ||
- $as_test_x conftest$ac_exeext
- }; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
- # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
- # interfere with the next link command; also delete a directory that is
- # left behind by Apple's compiler. We do this before executing the actions.
- rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
- as_fn_set_status $ac_retval
-
-} # ac_fn_fc_try_link
cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by Specfem 3D $as_me 2.0.1, which was
-generated by GNU Autoconf 2.66. Invocation command line was
+generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -1898,8 +1593,8 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- $as_echo "PATH: $as_dir"
- done
+ $as_echo "PATH: $as_dir"
+done
IFS=$as_save_IFS
} >&5
@@ -1936,9 +1631,9 @@
ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
esac
case $ac_pass in
- 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+ 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
2)
- as_fn_append ac_configure_args1 " '$ac_arg'"
+ ac_configure_args1="$ac_configure_args1 '$ac_arg'"
if test $ac_must_keep_next = true; then
ac_must_keep_next=false # Got value, back to normal.
else
@@ -1954,13 +1649,13 @@
-* ) ac_must_keep_next=true ;;
esac
fi
- as_fn_append ac_configure_args " '$ac_arg'"
+ ac_configure_args="$ac_configure_args '$ac_arg'"
;;
esac
done
done
-{ ac_configure_args0=; unset ac_configure_args0;}
-{ ac_configure_args1=; unset ac_configure_args1;}
+$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
+$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
# When interrupted or exit'd, cleanup temporary files, and complete
# config.log. We remove comments because anyway the quotes in there
@@ -1972,9 +1667,11 @@
{
echo
- $as_echo "## ---------------- ##
+ cat <<\_ASBOX
+## ---------------- ##
## Cache variables. ##
-## ---------------- ##"
+## ---------------- ##
+_ASBOX
echo
# The following way of writing the cache mishandles newlines in values,
(
@@ -1983,13 +1680,13 @@
case $ac_val in #(
*${as_nl}*)
case $ac_var in #(
- *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+ *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5
$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
esac
case $ac_var in #(
_ | IFS | as_nl) ;; #(
BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
- *) { eval $ac_var=; unset $ac_var;} ;;
+ *) $as_unset $ac_var ;;
esac ;;
esac
done
@@ -2008,9 +1705,11 @@
)
echo
- $as_echo "## ----------------- ##
+ cat <<\_ASBOX
+## ----------------- ##
## Output variables. ##
-## ----------------- ##"
+## ----------------- ##
+_ASBOX
echo
for ac_var in $ac_subst_vars
do
@@ -2023,9 +1722,11 @@
echo
if test -n "$ac_subst_files"; then
- $as_echo "## ------------------- ##
+ cat <<\_ASBOX
+## ------------------- ##
## File substitutions. ##
-## ------------------- ##"
+## ------------------- ##
+_ASBOX
echo
for ac_var in $ac_subst_files
do
@@ -2039,9 +1740,11 @@
fi
if test -s confdefs.h; then
- $as_echo "## ----------- ##
+ cat <<\_ASBOX
+## ----------- ##
## confdefs.h. ##
-## ----------- ##"
+## ----------- ##
+_ASBOX
echo
cat confdefs.h
echo
@@ -2055,53 +1758,46 @@
exit $exit_status
' 0
for ac_signal in 1 2 13 15; do
- trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+ trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
done
ac_signal=0
# confdefs.h avoids OS command line length limits that DEFS can exceed.
rm -f -r conftest* confdefs.h
-$as_echo "/* confdefs.h */" > confdefs.h
-
# Predefined preprocessor variables.
cat >>confdefs.h <<_ACEOF
#define PACKAGE_NAME "$PACKAGE_NAME"
_ACEOF
+
cat >>confdefs.h <<_ACEOF
#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
_ACEOF
+
cat >>confdefs.h <<_ACEOF
#define PACKAGE_VERSION "$PACKAGE_VERSION"
_ACEOF
+
cat >>confdefs.h <<_ACEOF
#define PACKAGE_STRING "$PACKAGE_STRING"
_ACEOF
+
cat >>confdefs.h <<_ACEOF
#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
_ACEOF
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_URL "$PACKAGE_URL"
-_ACEOF
-
# Let the site file select an alternate cache file if it wants to.
# Prefer an explicitly selected file to automatically selected ones.
ac_site_file1=NONE
ac_site_file2=NONE
if test -n "$CONFIG_SITE"; then
- # We do not want a PATH search for config.site.
- case $CONFIG_SITE in #((
- -*) ac_site_file1=./$CONFIG_SITE;;
- */*) ac_site_file1=$CONFIG_SITE;;
- *) ac_site_file1=./$CONFIG_SITE;;
- esac
+ ac_site_file1=$CONFIG_SITE
elif test "x$prefix" != xNONE; then
ac_site_file1=$prefix/share/config.site
ac_site_file2=$prefix/etc/config.site
@@ -2112,23 +1808,19 @@
for ac_site_file in "$ac_site_file1" "$ac_site_file2"
do
test "x$ac_site_file" = xNONE && continue
- if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
+ if test -r "$ac_site_file"; then
+ { $as_echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
$as_echo "$as_me: loading site script $ac_site_file" >&6;}
sed 's/^/| /' "$ac_site_file" >&5
- . "$ac_site_file" \
- || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "failed to load site script $ac_site_file
-See \`config.log' for more details" "$LINENO" 5; }
+ . "$ac_site_file"
fi
done
if test -r "$cache_file"; then
- # Some versions of bash will fail to source /dev/null (special files
- # actually), so we avoid doing that. DJGPP emulates it as a regular file.
- if test /dev/null != "$cache_file" && test -f "$cache_file"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
+ # Some versions of bash will fail to source /dev/null (special
+ # files actually), so we avoid doing that.
+ if test -f "$cache_file"; then
+ { $as_echo "$as_me:$LINENO: loading cache $cache_file" >&5
$as_echo "$as_me: loading cache $cache_file" >&6;}
case $cache_file in
[\\/]* | ?:[\\/]* ) . "$cache_file";;
@@ -2136,7 +1828,7 @@
esac
fi
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+ { $as_echo "$as_me:$LINENO: creating cache $cache_file" >&5
$as_echo "$as_me: creating cache $cache_file" >&6;}
>$cache_file
fi
@@ -2151,11 +1843,11 @@
eval ac_new_val=\$ac_env_${ac_var}_value
case $ac_old_set,$ac_new_set in
set,)
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+ { $as_echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
ac_cache_corrupted=: ;;
,set)
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5
+ { $as_echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
ac_cache_corrupted=: ;;
,);;
@@ -2165,17 +1857,17 @@
ac_old_val_w=`echo x $ac_old_val`
ac_new_val_w=`echo x $ac_new_val`
if test "$ac_old_val_w" != "$ac_new_val_w"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5
+ { $as_echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
ac_cache_corrupted=:
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
+ { $as_echo "$as_me:$LINENO: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
eval $ac_var=\$ac_old_val
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5
+ { $as_echo "$as_me:$LINENO: former value: \`$ac_old_val'" >&5
$as_echo "$as_me: former value: \`$ac_old_val'" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5
+ { $as_echo "$as_me:$LINENO: current value: \`$ac_new_val'" >&5
$as_echo "$as_me: current value: \`$ac_new_val'" >&2;}
fi;;
esac
@@ -2187,21 +1879,44 @@
esac
case " $ac_configure_args " in
*" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
- *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+ *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
esac
fi
done
if $ac_cache_corrupted; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
+ { $as_echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
- as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
+$as_echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
+ { (exit 1); exit 1; }; }
fi
-## -------------------- ##
-## Main body of script. ##
-## -------------------- ##
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -2219,7 +1934,7 @@
# Check whether --with-pyre was given.
-if test "${with_pyre+set}" = set; then :
+if test "${with_pyre+set}" = set; then
withval=$with_pyre; want_pyre="$withval"
else
want_pyre=no
@@ -2236,7 +1951,7 @@
# Check whether --with-mpi was given.
-if test "${with_mpi+set}" = set; then :
+if test "${with_mpi+set}" = set; then
withval=$with_mpi; want_mpi="$withval"
else
want_mpi=yes
@@ -2252,7 +1967,7 @@
# Check whether --enable-double-precision was given.
-if test "${enable_double_precision+set}" = set; then :
+if test "${enable_double_precision+set}" = set; then
enableval=$enable_double_precision; want_double_precision="$enableval"
else
want_double_precision=no
@@ -2269,6 +1984,23 @@
+# Check whether --with-cuda was given.
+if test "${with_cuda+set}" = set; then
+ withval=$with_cuda; want_cuda="$withval"
+else
+ want_cuda=no
+fi
+
+ if test "$want_cuda" = yes; then
+ COND_CUDA_TRUE=
+ COND_CUDA_FALSE='#'
+else
+ COND_CUDA_TRUE='#'
+ COND_CUDA_FALSE=
+fi
+
+
+
# Checks for programs.
if test "$want_pyre" = yes; then
@@ -2278,7 +2010,7 @@
if test -n "$PYTHON"; then
# If the user set $PYTHON, use it and don't search something else.
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $PYTHON version >= 2.3" >&5
+ { $as_echo "$as_me:$LINENO: checking whether $PYTHON version >= 2.3" >&5
$as_echo_n "checking whether $PYTHON version >= 2.3... " >&6; }
prog="import sys
# split strings by '.' and convert to numeric. Append some zeros
@@ -2293,19 +2025,22 @@
($PYTHON -c "$prog") >&5 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+ (exit $ac_status); }; then
+ { $as_echo "$as_me:$LINENO: result: yes" >&5
$as_echo "yes" >&6; }
else
- as_fn_error $? "too old" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: too old" >&5
+$as_echo "$as_me: error: too old" >&2;}
+ { (exit 1); exit 1; }; }
fi
+
am_display_PYTHON=$PYTHON
else
# Otherwise, try each interpreter until we find one that satisfies
# VERSION.
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a Python interpreter with version >= 2.3" >&5
+ { $as_echo "$as_me:$LINENO: checking for a Python interpreter with version >= 2.3" >&5
$as_echo_n "checking for a Python interpreter with version >= 2.3... " >&6; }
-if test "${am_cv_pathless_PYTHON+set}" = set; then :
+if test "${am_cv_pathless_PYTHON+set}" = set; then
$as_echo_n "(cached) " >&6
else
@@ -2324,12 +2059,13 @@
($am_cv_pathless_PYTHON -c "$prog") >&5 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; then :
+ (exit $ac_status); }; then
break
fi
+
done
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_pathless_PYTHON" >&5
+{ $as_echo "$as_me:$LINENO: result: $am_cv_pathless_PYTHON" >&5
$as_echo "$am_cv_pathless_PYTHON" >&6; }
# Set $PYTHON to the absolute path of $am_cv_pathless_PYTHON.
if test "$am_cv_pathless_PYTHON" = none; then
@@ -2337,9 +2073,9 @@
else
# Extract the first word of "$am_cv_pathless_PYTHON", so it can be a program name with args.
set dummy $am_cv_pathless_PYTHON; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_PYTHON+set}" = set; then :
+if test "${ac_cv_path_PYTHON+set}" = set; then
$as_echo_n "(cached) " >&6
else
case $PYTHON in
@@ -2352,14 +2088,14 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_path_PYTHON="$as_dir/$ac_word$ac_exec_ext"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
;;
@@ -2367,10 +2103,10 @@
fi
PYTHON=$ac_cv_path_PYTHON
if test -n "$PYTHON"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PYTHON" >&5
+ { $as_echo "$as_me:$LINENO: result: $PYTHON" >&5
$as_echo "$PYTHON" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -2381,18 +2117,20 @@
if test "$PYTHON" = :; then
- as_fn_error $? "no suitable Python interpreter found" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: no suitable Python interpreter found" >&5
+$as_echo "$as_me: error: no suitable Python interpreter found" >&2;}
+ { (exit 1); exit 1; }; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $am_display_PYTHON version" >&5
+ { $as_echo "$as_me:$LINENO: checking for $am_display_PYTHON version" >&5
$as_echo_n "checking for $am_display_PYTHON version... " >&6; }
-if test "${am_cv_python_version+set}" = set; then :
+if test "${am_cv_python_version+set}" = set; then
$as_echo_n "(cached) " >&6
else
am_cv_python_version=`$PYTHON -c "import sys; sys.stdout.write(sys.version[:3])"`
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_python_version" >&5
+{ $as_echo "$as_me:$LINENO: result: $am_cv_python_version" >&5
$as_echo "$am_cv_python_version" >&6; }
PYTHON_VERSION=$am_cv_python_version
@@ -2404,23 +2142,23 @@
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $am_display_PYTHON platform" >&5
+ { $as_echo "$as_me:$LINENO: checking for $am_display_PYTHON platform" >&5
$as_echo_n "checking for $am_display_PYTHON platform... " >&6; }
-if test "${am_cv_python_platform+set}" = set; then :
+if test "${am_cv_python_platform+set}" = set; then
$as_echo_n "(cached) " >&6
else
am_cv_python_platform=`$PYTHON -c "import sys; sys.stdout.write(sys.platform)"`
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_python_platform" >&5
+{ $as_echo "$as_me:$LINENO: result: $am_cv_python_platform" >&5
$as_echo "$am_cv_python_platform" >&6; }
PYTHON_PLATFORM=$am_cv_python_platform
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $am_display_PYTHON script directory" >&5
+ { $as_echo "$as_me:$LINENO: checking for $am_display_PYTHON script directory" >&5
$as_echo_n "checking for $am_display_PYTHON script directory... " >&6; }
-if test "${am_cv_python_pythondir+set}" = set; then :
+if test "${am_cv_python_pythondir+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test "x$prefix" = xNONE
@@ -2447,7 +2185,7 @@
esac
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_python_pythondir" >&5
+{ $as_echo "$as_me:$LINENO: result: $am_cv_python_pythondir" >&5
$as_echo "$am_cv_python_pythondir" >&6; }
pythondir=$am_cv_python_pythondir
@@ -2456,9 +2194,9 @@
pkgpythondir=\${pythondir}/$PACKAGE
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $am_display_PYTHON extension module directory" >&5
+ { $as_echo "$as_me:$LINENO: checking for $am_display_PYTHON extension module directory" >&5
$as_echo_n "checking for $am_display_PYTHON extension module directory... " >&6; }
-if test "${am_cv_python_pyexecdir+set}" = set; then :
+if test "${am_cv_python_pyexecdir+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test "x$exec_prefix" = xNONE
@@ -2485,7 +2223,7 @@
esac
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_python_pyexecdir" >&5
+{ $as_echo "$as_me:$LINENO: result: $am_cv_python_pyexecdir" >&5
$as_echo "$am_cv_python_pyexecdir" >&6; }
pyexecdir=$am_cv_python_pyexecdir
@@ -2501,7 +2239,7 @@
# $Id: cit_python.m4 18460 2011-05-25 14:52:04Z brad $
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $am_display_PYTHON sysconfig" >&5
+{ $as_echo "$as_me:$LINENO: checking $am_display_PYTHON sysconfig" >&5
$as_echo_n "checking $am_display_PYTHON sysconfig... " >&6; }
cat >sysconfig.py <<END_OF_PYTHON
import os, sys
@@ -2582,13 +2320,18 @@
END_OF_PYTHON
eval `$PYTHON sysconfig.py 2>/dev/null`
if test -n "$PYTHON_INCDIR"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5
+ { $as_echo "$as_me:$LINENO: result: ok" >&5
$as_echo "ok" >&6; }
else
- as_fn_error $? "\"failed
+ { { $as_echo "$as_me:$LINENO: error: \"failed
Run '$PYTHON sysconfig.py' to see what went wrong.
-\"" "$LINENO" 5
+\"" >&5
+$as_echo "$as_me: error: \"failed
+
+Run '$PYTHON sysconfig.py' to see what went wrong.
+\"" >&2;}
+ { (exit 1); exit 1; }; }
fi
rm -f sysconfig.py
PYTHON_INCDIR=$PYTHON_INCDIR
@@ -2633,13 +2376,13 @@
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
if test -n "$ac_tool_prefix"; then
- for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
+ for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgf95 lf95 ftn xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
do
# Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_FC+set}" = set; then :
+if test "${ac_cv_prog_FC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$FC"; then
@@ -2650,24 +2393,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_FC="$ac_tool_prefix$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
FC=$ac_cv_prog_FC
if test -n "$FC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $FC" >&5
+ { $as_echo "$as_me:$LINENO: result: $FC" >&5
$as_echo "$FC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -2677,13 +2420,13 @@
fi
if test -z "$FC"; then
ac_ct_FC=$FC
- for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
+ for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgf95 lf95 ftn xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_FC+set}" = set; then :
+if test "${ac_cv_prog_ac_ct_FC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_FC"; then
@@ -2694,24 +2437,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_ac_ct_FC="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
ac_ct_FC=$ac_cv_prog_ac_ct_FC
if test -n "$ac_ct_FC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_FC" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_ct_FC" >&5
$as_echo "$ac_ct_FC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -2724,7 +2467,7 @@
else
case $cross_compiling:$ac_tool_warned in
yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
ac_tool_warned=yes ;;
esac
@@ -2734,32 +2477,45 @@
# Provide some information about the compiler.
-$as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran compiler version" >&5
+$as_echo "$as_me:$LINENO: checking for Fortran compiler version" >&5
set X $ac_compile
ac_compiler=$2
-for ac_option in --version -v -V -qversion; do
- { { ac_try="$ac_compiler $ac_option >&5"
+{ (ac_try="$ac_compiler --version >&5"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler --version >&5") 2>&5
ac_status=$?
- if test -s conftest.err; then
- sed '10a\
-... rest of stderr output deleted ...
- 10q' conftest.err >conftest.er1
- cat conftest.er1 >&5
- fi
- rm -f conftest.er1 conftest.err
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
-done
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -v >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -V >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
rm -f a.out
-cat > conftest.$ac_ext <<_ACEOF
+cat >conftest.$ac_ext <<_ACEOF
program main
end
@@ -2769,8 +2525,8 @@
# Try to create an executable without -o first, disregard a.out.
# It will help us diagnose broken compilers, and finding out an intuition
# of exeext.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the Fortran compiler works" >&5
-$as_echo_n "checking whether the Fortran compiler works... " >&6; }
+{ $as_echo "$as_me:$LINENO: checking for Fortran compiler default output file name" >&5
+$as_echo_n "checking for Fortran compiler default output file name... " >&6; }
ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
# The possible output files:
@@ -2786,17 +2542,17 @@
done
rm -f $ac_rmfiles
-if { { ac_try="$ac_link_default"
+if { (ac_try="$ac_link_default"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
(eval "$ac_link_default") 2>&5
ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then :
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
# Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
# in a Makefile. We should not override ac_cv_exeext if it was cached,
@@ -2813,7 +2569,7 @@
# certainly right.
break;;
*.* )
- if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+ if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
then :; else
ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
fi
@@ -2832,41 +2588,84 @@
else
ac_file=''
fi
-if test -z "$ac_file"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-$as_echo "$as_me: failed program was:" >&5
+
+{ $as_echo "$as_me:$LINENO: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
+if test -z "$ac_file"; then
+ $as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
-{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+{ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error 77 "Fortran compiler cannot create executables
-See \`config.log' for more details" "$LINENO" 5; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
+{ { $as_echo "$as_me:$LINENO: error: Fortran compiler cannot create executables
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: Fortran compiler cannot create executables
+See \`config.log' for more details." >&2;}
+ { (exit 77); exit 77; }; }; }
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran compiler default output file name" >&5
-$as_echo_n "checking for Fortran compiler default output file name... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
-$as_echo "$ac_file" >&6; }
+
ac_exeext=$ac_cv_exeext
+# Check that the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:$LINENO: checking whether the Fortran compiler works" >&5
+$as_echo_n "checking whether the Fortran compiler works... " >&6; }
+# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
+# If not cross compiling, check that we can run a simple program.
+if test "$cross_compiling" != yes; then
+ if { ac_try='./$ac_file'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ cross_compiling=no
+ else
+ if test "$cross_compiling" = maybe; then
+ cross_compiling=yes
+ else
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ { $as_echo "$as_me:$LINENO: error: cannot run Fortran compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot run Fortran compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
+ fi
+ fi
+fi
+{ $as_echo "$as_me:$LINENO: result: yes" >&5
+$as_echo "yes" >&6; }
+
rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
ac_clean_files=$ac_clean_files_save
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
+# Check that the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+{ $as_echo "$as_me:$LINENO: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+{ $as_echo "$as_me:$LINENO: checking for suffix of executables" >&5
$as_echo_n "checking for suffix of executables... " >&6; }
-if { { ac_try="$ac_link"
+if { (ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
(eval "$ac_link") 2>&5
ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then :
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
# If both `conftest.exe' and `conftest' are `present' (well, observable)
# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
# work properly (i.e., refer to `conftest.exe'), while it won't with
@@ -2881,93 +2680,44 @@
esac
done
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compute suffix of executables: cannot compile and link
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
-rm -f conftest conftest$ac_cv_exeext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
+
+rm -f conftest$ac_cv_exeext
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
$as_echo "$ac_cv_exeext" >&6; }
rm -f conftest.$ac_ext
EXEEXT=$ac_cv_exeext
ac_exeext=$EXEEXT
-cat > conftest.$ac_ext <<_ACEOF
- program main
- open(unit=9,file='conftest.out')
- close(unit=9)
-
- end
-_ACEOF
-ac_clean_files="$ac_clean_files conftest.out"
-# Check that the compiler produces executables we can run. If not, either
-# the compiler is broken, or we cross compile.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
-$as_echo_n "checking whether we are cross compiling... " >&6; }
-if test "$cross_compiling" != yes; then
- { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
- if { ac_try='./conftest$ac_cv_exeext'
- { { case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then
- cross_compiling=no
- else
- if test "$cross_compiling" = maybe; then
- cross_compiling=yes
- else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run Fortran compiled programs.
-If you meant to cross compile, use \`--host'.
-See \`config.log' for more details" "$LINENO" 5; }
- fi
- fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
-$as_echo "$cross_compiling" >&6; }
-
-rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
-ac_clean_files=$ac_clean_files_save
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
+{ $as_echo "$as_me:$LINENO: checking for suffix of object files" >&5
$as_echo_n "checking for suffix of object files... " >&6; }
-if test "${ac_cv_objext+set}" = set; then :
+if test "${ac_cv_objext+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
end
_ACEOF
rm -f conftest.o conftest.obj
-if { { ac_try="$ac_compile"
+if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
(eval "$ac_compile") 2>&5
ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then :
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
for ac_file in conftest.o conftest.obj conftest.*; do
test -f "$ac_file" || continue;
case $ac_file in
@@ -2980,14 +2730,18 @@
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
-{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+{ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compute suffix of object files: cannot compile
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
+
rm -f conftest.$ac_cv_objext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
$as_echo "$ac_cv_objext" >&6; }
OBJEXT=$ac_cv_objext
ac_objext=$OBJEXT
@@ -2995,12 +2749,12 @@
# input file. (Note that this only needs to work for GNU compilers.)
ac_save_ext=$ac_ext
ac_ext=F
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU Fortran compiler" >&5
+{ $as_echo "$as_me:$LINENO: checking whether we are using the GNU Fortran compiler" >&5
$as_echo_n "checking whether we are using the GNU Fortran compiler... " >&6; }
-if test "${ac_cv_fc_compiler_gnu+set}" = set; then :
+if test "${ac_cv_fc_compiler_gnu+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
#ifndef __GNUC__
choke me
@@ -3008,44 +2762,86 @@
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_compiler_gnu=yes
else
- ac_compiler_gnu=no
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_compiler_gnu=no
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_cv_fc_compiler_gnu=$ac_compiler_gnu
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_compiler_gnu" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_compiler_gnu" >&5
$as_echo "$ac_cv_fc_compiler_gnu" >&6; }
ac_ext=$ac_save_ext
-ac_test_FCFLAGS=${FCFLAGS+set}
-ac_save_FCFLAGS=$FCFLAGS
+ac_test_FFLAGS=${FCFLAGS+set}
+ac_save_FFLAGS=$FCFLAGS
FCFLAGS=
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $FC accepts -g" >&5
+{ $as_echo "$as_me:$LINENO: checking whether $FC accepts -g" >&5
$as_echo_n "checking whether $FC accepts -g... " >&6; }
-if test "${ac_cv_prog_fc_g+set}" = set; then :
+if test "${ac_cv_prog_fc_g+set}" = set; then
$as_echo_n "(cached) " >&6
else
FCFLAGS=-g
-cat > conftest.$ac_ext <<_ACEOF
+cat >conftest.$ac_ext <<_ACEOF
program main
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_fc_g=yes
else
- ac_cv_prog_fc_g=no
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_prog_fc_g=no
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_g" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_fc_g" >&5
$as_echo "$ac_cv_prog_fc_g" >&6; }
-if test "$ac_test_FCFLAGS" = set; then
- FCFLAGS=$ac_save_FCFLAGS
+if test "$ac_test_FFLAGS" = set; then
+ FCFLAGS=$ac_save_FFLAGS
elif test $ac_cv_prog_fc_g = yes; then
if test "x$ac_cv_fc_compiler_gnu" = xyes; then
FCFLAGS="-g -O2"
@@ -3074,10 +2870,12 @@
flags_guess="$SHELL flags.guess"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: running $flags_guess" >&5
+{ $as_echo "$as_me:$LINENO: running $flags_guess" >&5
$as_echo "$as_me: running $flags_guess" >&6;}
flags=`$flags_guess` ||
- as_fn_error $? "$flags_guess failed" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: $flags_guess failed" >&5
+$as_echo "$as_me: error: $flags_guess failed" >&2;}
+ { (exit 1); exit 1; }; }
eval $flags
@@ -3085,21 +2883,38 @@
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to get verbose linking output from $FC" >&5
+{ $as_echo "$as_me:$LINENO: checking how to get verbose linking output from $FC" >&5
$as_echo_n "checking how to get verbose linking output from $FC... " >&6; }
-if test "${ac_cv_prog_fc_v+set}" = set; then :
+if test "${ac_cv_prog_fc_v+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_fc_v=
# Try some options frequently used verbose output
for ac_verb in -v -verbose --verbose -V -\#\#\#; do
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
end
@@ -3109,17 +2924,17 @@
# 1 to this macro) to the Fortran compiler in order to get
# "verbose" output that we can then parse for the Fortran linker
# flags.
-ac_save_FCFLAGS=$FCFLAGS
+ac_save_FFLAGS=$FCFLAGS
FCFLAGS="$FCFLAGS $ac_verb"
eval "set x $ac_link"
shift
-$as_echo "$as_me:${as_lineno-$LINENO}: $*" >&5
+$as_echo "$as_me:$LINENO: $*" >&5
# gfortran 4.3 outputs lines setting COLLECT_GCC_OPTIONS, COMPILER_PATH,
# LIBRARY_PATH; skip all such settings.
ac_fc_v_output=`eval $ac_link 5>&1 2>&1 |
grep -v 'Driving:' | grep -v "^[_$as_cr_Letters][_$as_cr_alnum]*="`
$as_echo "$ac_fc_v_output" >&5
-FCFLAGS=$ac_save_FCFLAGS
+FCFLAGS=$ac_save_FFLAGS
rm -rf conftest*
@@ -3148,9 +2963,9 @@
# Doubly-quoted arguments were reported for "PGF90/x86 Linux/x86 5.0-2".
*-cmdline\ * | *-ignore\ * | *-def\ *)
ac_fc_v_output=`echo $ac_fc_v_output | sed "\
- s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g
- s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g
- s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;;
+ s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g
+ s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g
+ s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;;
# If we are using Cray Fortran then delete quotes.
*cft90*)
@@ -3161,35 +2976,39 @@
# look for -l* and *.a constructs in the output
for ac_arg in $ac_fc_v_output; do
case $ac_arg in
- [\\/]*.a | ?:[\\/]*.a | -[lLRu]*)
- ac_cv_prog_fc_v=$ac_verb
- break 2 ;;
+ [\\/]*.a | ?:[\\/]*.a | -[lLRu]*)
+ ac_cv_prog_fc_v=$ac_verb
+ break 2 ;;
esac
done
done
if test -z "$ac_cv_prog_fc_v"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot determine how to obtain linking information from $FC" >&5
+ { $as_echo "$as_me:$LINENO: WARNING: cannot determine how to obtain linking information from $FC" >&5
$as_echo "$as_me: WARNING: cannot determine how to obtain linking information from $FC" >&2;}
fi
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: compilation failed" >&5
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:$LINENO: WARNING: compilation failed" >&5
$as_echo "$as_me: WARNING: compilation failed" >&2;}
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_v" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_fc_v" >&5
$as_echo "$ac_cv_prog_fc_v" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran libraries of $FC" >&5
+{ $as_echo "$as_me:$LINENO: checking for Fortran libraries of $FC" >&5
$as_echo_n "checking for Fortran libraries of $FC... " >&6; }
-if test "${ac_cv_fc_libs+set}" = set; then :
+if test "${ac_cv_fc_libs+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test "x$FCLIBS" != "x"; then
ac_cv_fc_libs="$FCLIBS" # Let the user override the test.
else
-cat > conftest.$ac_ext <<_ACEOF
+cat >conftest.$ac_ext <<_ACEOF
program main
end
@@ -3199,17 +3018,17 @@
# 1 to this macro) to the Fortran compiler in order to get
# "verbose" output that we can then parse for the Fortran linker
# flags.
-ac_save_FCFLAGS=$FCFLAGS
+ac_save_FFLAGS=$FCFLAGS
FCFLAGS="$FCFLAGS $ac_cv_prog_fc_v"
eval "set x $ac_link"
shift
-$as_echo "$as_me:${as_lineno-$LINENO}: $*" >&5
+$as_echo "$as_me:$LINENO: $*" >&5
# gfortran 4.3 outputs lines setting COLLECT_GCC_OPTIONS, COMPILER_PATH,
# LIBRARY_PATH; skip all such settings.
ac_fc_v_output=`eval $ac_link 5>&1 2>&1 |
grep -v 'Driving:' | grep -v "^[_$as_cr_Letters][_$as_cr_alnum]*="`
$as_echo "$ac_fc_v_output" >&5
-FCFLAGS=$ac_save_FCFLAGS
+FCFLAGS=$ac_save_FFLAGS
rm -rf conftest*
@@ -3238,9 +3057,9 @@
# Doubly-quoted arguments were reported for "PGF90/x86 Linux/x86 5.0-2".
*-cmdline\ * | *-ignore\ * | *-def\ *)
ac_fc_v_output=`echo $ac_fc_v_output | sed "\
- s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g
- s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g
- s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;;
+ s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g
+ s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g
+ s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;;
# If we are using Cray Fortran then delete quotes.
*cft90*)
@@ -3259,8 +3078,8 @@
shift
ac_arg=$1
case $ac_arg in
- [\\/]*.a | ?:[\\/]*.a)
- ac_exists=false
+ [\\/]*.a | ?:[\\/]*.a)
+ ac_exists=false
for ac_i in $ac_cv_fc_libs; do
if test x"$ac_arg" = x"$ac_i"; then
ac_exists=true
@@ -3268,14 +3087,15 @@
fi
done
- if test x"$ac_exists" = xtrue; then :
-
+ if test x"$ac_exists" = xtrue; then
+ :
else
ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
fi
- ;;
- -bI:*)
- ac_exists=false
+
+ ;;
+ -bI:*)
+ ac_exists=false
for ac_i in $ac_cv_fc_libs; do
if test x"$ac_arg" = x"$ac_i"; then
ac_exists=true
@@ -3283,8 +3103,8 @@
fi
done
- if test x"$ac_exists" = xtrue; then :
-
+ if test x"$ac_exists" = xtrue; then
+ :
else
if test "$ac_compiler_gnu" = yes; then
for ac_link_opt in $ac_arg; do
@@ -3294,18 +3114,18 @@
ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
fi
fi
- ;;
- # Ignore these flags.
- -lang* | -lcrt*.o | -lc | -lgcc* | -lSystem | -libmil | -little \
- |-LANG:=* | -LIST:* | -LNO:*)
- ;;
- -lkernel32)
- test x"$CYGWIN" != xyes && ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
- ;;
- -[LRuYz])
- # These flags, when seen by themselves, take an argument.
- # We remove the space between option and argument and re-iterate
- # unless we find an empty arg or a new option (starting with -)
+
+ ;;
+ # Ignore these flags.
+ -lang* | -lcrt*.o | -lc | -lgcc* | -lSystem | -libmil | -LANG:=* | -LIST:* | -LNO:*)
+ ;;
+ -lkernel32)
+ test x"$CYGWIN" != xyes && ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
+ ;;
+ -[LRuYz])
+ # These flags, when seen by themselves, take an argument.
+ # We remove the space between option and argument and re-iterate
+ # unless we find an empty arg or a new option (starting with -)
case $2 in
"" | -*);;
*)
@@ -3314,10 +3134,10 @@
set X $ac_arg "$@"
;;
esac
- ;;
- -YP,*)
- for ac_j in `$as_echo "$ac_arg" | sed -e 's/-YP,/-L/;s/:/ -L/g'`; do
- ac_exists=false
+ ;;
+ -YP,*)
+ for ac_j in `$as_echo "$ac_arg" | sed -e 's/-YP,/-L/;s/:/ -L/g'`; do
+ ac_exists=false
for ac_i in $ac_cv_fc_libs; do
if test x"$ac_j" = x"$ac_i"; then
ac_exists=true
@@ -3325,16 +3145,17 @@
fi
done
- if test x"$ac_exists" = xtrue; then :
-
+ if test x"$ac_exists" = xtrue; then
+ :
else
ac_arg="$ac_arg $ac_j"
- ac_cv_fc_libs="$ac_cv_fc_libs $ac_j"
+ ac_cv_fc_libs="$ac_cv_fc_libs $ac_j"
fi
- done
- ;;
- -[lLR]*)
- ac_exists=false
+
+ done
+ ;;
+ -[lLR]*)
+ ac_exists=false
for ac_i in $ac_cv_fc_libs; do
if test x"$ac_arg" = x"$ac_i"; then
ac_exists=true
@@ -3342,16 +3163,17 @@
fi
done
- if test x"$ac_exists" = xtrue; then :
-
+ if test x"$ac_exists" = xtrue; then
+ :
else
ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
fi
- ;;
+
+ ;;
-zallextract*| -zdefaultextract)
ac_cv_fc_libs="$ac_cv_fc_libs $ac_arg"
;;
- # Ignore everything else.
+ # Ignore everything else.
esac
done
# restore positional arguments
@@ -3363,9 +3185,9 @@
case `(uname -sr) 2>/dev/null` in
"SunOS 5"*)
ac_ld_run_path=`$as_echo "$ac_fc_v_output" |
- sed -n 's,^.*LD_RUN_PATH *= *\(/[^ ]*\).*$,-R\1,p'`
+ sed -n 's,^.*LD_RUN_PATH *= *\(/[^ ]*\).*$,-R\1,p'`
test "x$ac_ld_run_path" != x &&
- if test "$ac_compiler_gnu" = yes; then
+ if test "$ac_compiler_gnu" = yes; then
for ac_link_opt in $ac_ld_run_path; do
ac_cv_fc_libs="$ac_cv_fc_libs -Xlinker $ac_link_opt"
done
@@ -3377,7 +3199,7 @@
fi # test "x$[]_AC_LANG_PREFIX[]LIBS" = "x"
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_libs" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_libs" >&5
$as_echo "$ac_cv_fc_libs" >&6; }
FCLIBS="$ac_cv_fc_libs"
@@ -3396,9 +3218,9 @@
if test -n "$ac_tool_prefix"; then
# Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
set dummy ${ac_tool_prefix}gcc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -3409,24 +3231,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="${ac_tool_prefix}gcc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3436,9 +3258,9 @@
ac_ct_CC=$CC
# Extract the first word of "gcc", so it can be a program name with args.
set dummy gcc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_CC"; then
@@ -3449,24 +3271,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_ac_ct_CC="gcc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
ac_ct_CC=$ac_cv_prog_ac_ct_CC
if test -n "$ac_ct_CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
$as_echo "$ac_ct_CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3475,7 +3297,7 @@
else
case $cross_compiling:$ac_tool_warned in
yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
ac_tool_warned=yes ;;
esac
@@ -3489,9 +3311,9 @@
if test -n "$ac_tool_prefix"; then
# Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
set dummy ${ac_tool_prefix}cc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -3502,24 +3324,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="${ac_tool_prefix}cc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3529,9 +3351,9 @@
if test -z "$CC"; then
# Extract the first word of "cc", so it can be a program name with args.
set dummy cc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -3543,18 +3365,18 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
ac_prog_rejected=yes
continue
fi
ac_cv_prog_CC="cc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
if test $ac_prog_rejected = yes; then
@@ -3573,10 +3395,10 @@
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3588,9 +3410,9 @@
do
# Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -3601,24 +3423,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3632,9 +3454,9 @@
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_CC"; then
@@ -3645,24 +3467,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_ac_ct_CC="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
ac_ct_CC=$ac_cv_prog_ac_ct_CC
if test -n "$ac_ct_CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
$as_echo "$ac_ct_CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -3675,7 +3497,7 @@
else
case $cross_compiling:$ac_tool_warned in
yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
ac_tool_warned=yes ;;
esac
@@ -3686,42 +3508,62 @@
fi
-test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+test -z "$CC" && { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "no acceptable C compiler found in \$PATH
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
# Provide some information about the compiler.
-$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+$as_echo "$as_me:$LINENO: checking for C compiler version" >&5
set X $ac_compile
ac_compiler=$2
-for ac_option in --version -v -V -qversion; do
- { { ac_try="$ac_compiler $ac_option >&5"
+{ (ac_try="$ac_compiler --version >&5"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler --version >&5") 2>&5
ac_status=$?
- if test -s conftest.err; then
- sed '10a\
-... rest of stderr output deleted ...
- 10q' conftest.err >conftest.er1
- cat conftest.er1 >&5
- fi
- rm -f conftest.er1 conftest.err
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
-done
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -v >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -V >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+{ $as_echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
-if test "${ac_cv_c_compiler_gnu+set}" = set; then :
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -3743,16 +3585,37 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_compiler_gnu=yes
else
- ac_compiler_gnu=no
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_compiler_gnu=no
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_cv_c_compiler_gnu=$ac_compiler_gnu
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
$as_echo "$ac_cv_c_compiler_gnu" >&6; }
if test $ac_compiler_gnu = yes; then
GCC=yes
@@ -3761,16 +3624,20 @@
fi
ac_test_CFLAGS=${CFLAGS+set}
ac_save_CFLAGS=$CFLAGS
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+{ $as_echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
$as_echo_n "checking whether $CC accepts -g... " >&6; }
-if test "${ac_cv_prog_cc_g+set}" = set; then :
+if test "${ac_cv_prog_cc_g+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_save_c_werror_flag=$ac_c_werror_flag
ac_c_werror_flag=yes
ac_cv_prog_cc_g=no
CFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -3789,11 +3656,35 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_g=yes
else
- CFLAGS=""
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ CFLAGS=""
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -3812,12 +3703,36 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ :
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-else
- ac_c_werror_flag=$ac_save_c_werror_flag
+ ac_c_werror_flag=$ac_save_c_werror_flag
CFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -3836,17 +3751,42 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_g=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_c_werror_flag=$ac_save_c_werror_flag
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
$as_echo "$ac_cv_prog_cc_g" >&6; }
if test "$ac_test_CFLAGS" = set; then
CFLAGS=$ac_save_CFLAGS
@@ -3863,14 +3803,18 @@
CFLAGS=
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+{ $as_echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5
$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
-if test "${ac_cv_prog_cc_c89+set}" = set; then :
+if test "${ac_cv_prog_cc_c89+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_cv_prog_cc_c89=no
ac_save_CC=$CC
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <stdarg.h>
#include <stdio.h>
@@ -3935,9 +3879,32 @@
-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
do
CC="$ac_save_CC $ac_arg"
- if ac_fn_c_try_compile "$LINENO"; then :
+ rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_c89=$ac_arg
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext
test "x$ac_cv_prog_cc_c89" != "xno" && break
done
@@ -3948,19 +3915,17 @@
# AC_CACHE_VAL
case "x$ac_cv_prog_cc_c89" in
x)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+ { $as_echo "$as_me:$LINENO: result: none needed" >&5
$as_echo "none needed" >&6; } ;;
xno)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+ { $as_echo "$as_me:$LINENO: result: unsupported" >&5
$as_echo "unsupported" >&6; } ;;
*)
CC="$CC $ac_cv_prog_cc_c89"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5
$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
esac
-if test "x$ac_cv_prog_cc_c89" != xno; then :
-fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
@@ -3974,9 +3939,9 @@
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dummy main to link with Fortran libraries" >&5
+{ $as_echo "$as_me:$LINENO: checking for dummy main to link with Fortran libraries" >&5
$as_echo_n "checking for dummy main to link with Fortran libraries... " >&6; }
-if test "${ac_cv_fc_dummy_main+set}" = set; then :
+if test "${ac_cv_fc_dummy_main+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_fc_dm_save_LIBS=$LIBS
@@ -3989,7 +3954,11 @@
ac_compiler_gnu=$ac_cv_c_compiler_gnu
# First, try linking without a dummy main:
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -4008,17 +3977,46 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_cv_fortran_dummy_main=none
else
- ac_cv_fortran_dummy_main=unknown
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_fortran_dummy_main=unknown
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+
if test $ac_cv_fortran_dummy_main = unknown; then
for ac_func in MAIN__ MAIN_ __main MAIN _MAIN __MAIN main_ main__ _main; do
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#define $ac_fortran_dm_var $ac_func
#ifdef FC_DUMMY_MAIN
@@ -4037,11 +4035,38 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_cv_fortran_dummy_main=$ac_func; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
done
fi
ac_ext=${ac_fc_srcext-f}
@@ -4053,10 +4078,10 @@
LIBS=$ac_fc_dm_save_LIBS
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_dummy_main" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_dummy_main" >&5
$as_echo "$ac_cv_fc_dummy_main" >&6; }
FC_DUMMY_MAIN=$ac_cv_fc_dummy_main
-if test "$FC_DUMMY_MAIN" != unknown; then :
+if test "$FC_DUMMY_MAIN" != unknown; then
if test $FC_DUMMY_MAIN != none; then
cat >>confdefs.h <<_ACEOF
@@ -4065,17 +4090,23 @@
if test "x$ac_cv_fc_dummy_main" = "x$ac_cv_f77_dummy_main"; then
-$as_echo "#define FC_DUMMY_MAIN_EQ_F77 1" >>confdefs.h
+cat >>confdefs.h <<\_ACEOF
+#define FC_DUMMY_MAIN_EQ_F77 1
+_ACEOF
fi
fi
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "linking to Fortran libraries from C fails
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: linking to Fortran libraries from C fails
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: linking to Fortran libraries from C fails
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
+
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -4086,12 +4117,12 @@
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran name-mangling scheme" >&5
+{ $as_echo "$as_me:$LINENO: checking for Fortran name-mangling scheme" >&5
$as_echo_n "checking for Fortran name-mangling scheme... " >&6; }
-if test "${ac_cv_fc_mangling+set}" = set; then :
+if test "${ac_cv_fc_mangling+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
subroutine foobar()
return
end
@@ -4099,7 +4130,24 @@
return
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
mv conftest.$ac_objext cfortran_test.$ac_objext
ac_save_LIBS=$LIBS
@@ -4114,7 +4162,11 @@
for ac_foobar in foobar FOOBAR; do
for ac_underscore in "" "_"; do
ac_func="$ac_foobar$ac_underscore"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
@@ -4140,11 +4192,38 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_success=yes; break 2
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
done
done
ac_ext=${ac_fc_srcext-f}
@@ -4172,7 +4251,11 @@
ac_success_extra=no
for ac_extra in "" "_"; do
ac_func="$ac_foo_bar$ac_underscore$ac_extra"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
@@ -4198,11 +4281,38 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_success_extra=yes; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
done
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
@@ -4211,16 +4321,16 @@
if test "$ac_success_extra" = "yes"; then
ac_cv_fc_mangling="$ac_case case"
- if test -z "$ac_underscore"; then
- ac_cv_fc_mangling="$ac_cv_fc_mangling, no underscore"
+ if test -z "$ac_underscore"; then
+ ac_cv_fc_mangling="$ac_cv_fc_mangling, no underscore"
else
- ac_cv_fc_mangling="$ac_cv_fc_mangling, underscore"
- fi
- if test -z "$ac_extra"; then
- ac_cv_fc_mangling="$ac_cv_fc_mangling, no extra underscore"
+ ac_cv_fc_mangling="$ac_cv_fc_mangling, underscore"
+ fi
+ if test -z "$ac_extra"; then
+ ac_cv_fc_mangling="$ac_cv_fc_mangling, no extra underscore"
else
- ac_cv_fc_mangling="$ac_cv_fc_mangling, extra underscore"
- fi
+ ac_cv_fc_mangling="$ac_cv_fc_mangling, extra underscore"
+ fi
else
ac_cv_fc_mangling="unknown"
fi
@@ -4232,15 +4342,22 @@
rm -rf conftest*
rm -f cfortran_test*
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compile a simple Fortran program
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: cannot compile a simple Fortran program
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compile a simple Fortran program
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_mangling" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_mangling" >&5
$as_echo "$ac_cv_fc_mangling" >&6; }
ac_ext=c
@@ -4253,51 +4370,85 @@
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
+
+
case $ac_cv_fc_mangling in
"lower case, no underscore, no extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) name" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) name
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) name" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) name
+_ACEOF
;;
"lower case, no underscore, extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) name" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) name
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) name ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) name ## _
+_ACEOF
;;
"lower case, underscore, no extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) name ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) name ## _
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) name ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) name ## _
+_ACEOF
;;
"lower case, underscore, extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) name ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) name ## _
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) name ## __" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) name ## __
+_ACEOF
;;
"upper case, no underscore, no extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) NAME" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) NAME
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) NAME" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) NAME
+_ACEOF
;;
"upper case, no underscore, extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) NAME" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) NAME
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) NAME ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) NAME ## _
+_ACEOF
;;
"upper case, underscore, no extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) NAME ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) NAME ## _
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) NAME ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) NAME ## _
+_ACEOF
;;
"upper case, underscore, extra underscore")
- $as_echo "#define FC_FUNC(name,NAME) NAME ## _" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC(name,NAME) NAME ## _
+_ACEOF
- $as_echo "#define FC_FUNC_(name,NAME) NAME ## __" >>confdefs.h
+ cat >>confdefs.h <<\_ACEOF
+#define FC_FUNC_(name,NAME) NAME ## __
+_ACEOF
;;
*)
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unknown Fortran name-mangling scheme" >&5
+ { $as_echo "$as_me:$LINENO: WARNING: unknown Fortran name-mangling scheme" >&5
$as_echo "$as_me: WARNING: unknown Fortran name-mangling scheme" >&2;}
- ;;
+ ;;
esac
ac_ext=c
@@ -4315,9 +4466,9 @@
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran flag to compile .f90 files" >&5
+{ $as_echo "$as_me:$LINENO: checking for Fortran flag to compile .f90 files" >&5
$as_echo_n "checking for Fortran flag to compile .f90 files... " >&6; }
-if test "${ac_cv_fc_srcext_f90+set}" = set; then :
+if test "${ac_cv_fc_srcext_f90+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_ext=f90
@@ -4326,24 +4477,49 @@
ac_cv_fc_srcext_f90=unknown
for ac_flag in none -qsuffix=f=f90 -Tf; do
test "x$ac_flag" != xnone && ac_fcflags_srcext="$ac_flag"
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_fc_srcext_f90=$ac_flag; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
done
rm -f conftest.$ac_objext conftest.f90
ac_fcflags_srcext=$ac_fcflags_srcext_save
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_srcext_f90" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_srcext_f90" >&5
$as_echo "$ac_cv_fc_srcext_f90" >&6; }
if test "x$ac_cv_fc_srcext_f90" = xunknown; then
- as_fn_error $? "Fortran could not compile .f90 files" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: Fortran could not compile .f90 files" >&5
+$as_echo "$as_me: error: Fortran could not compile .f90 files" >&2;}
+ { (exit 1); exit 1; }; }
else
ac_fc_srcext=f90
if test "x$ac_cv_fc_srcext_f90" = xnone; then
@@ -4365,38 +4541,63 @@
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran flag needed to accept free-form source" >&5
-$as_echo_n "checking for Fortran flag needed to accept free-form source... " >&6; }
-if test "${ac_cv_fc_freeform+set}" = set; then :
+{ $as_echo "$as_me:$LINENO: checking for Fortran flag needed to allow free-form source" >&5
+$as_echo_n "checking for Fortran flag needed to allow free-form source... " >&6; }
+if test "${ac_cv_fc_freeform+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_cv_fc_freeform=unknown
ac_fc_freeform_FCFLAGS_save=$FCFLAGS
for ac_flag in none -ffree-form -FR -free -qfree -Mfree -Mfreeform \
- -freeform "-f free" +source=free -nfix
+ -freeform "-f free"
do
test "x$ac_flag" != xnone && FCFLAGS="$ac_fc_freeform_FCFLAGS_save $ac_flag"
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program freeform
! FIXME: how to best confuse non-freeform compilers?
print *, 'Hello ', &
- 'world.'
+ 'world.'
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_fc_freeform=$ac_flag; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
done
rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
FCFLAGS=$ac_fc_freeform_FCFLAGS_save
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_freeform" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_freeform" >&5
$as_echo "$ac_cv_fc_freeform" >&6; }
if test "x$ac_cv_fc_freeform" = xunknown; then
- as_fn_error 77 "Fortran does not accept free-form source" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: Fortran does not accept free-form source" >&5
+$as_echo "$as_me: error: Fortran does not accept free-form source" >&2;}
+ { (exit 77); exit 77; }; }
else
if test "x$ac_cv_fc_freeform" != xnone; then
FCFLAGS="$FCFLAGS $ac_cv_fc_freeform"
@@ -4417,9 +4618,9 @@
if test -n "$ac_tool_prefix"; then
# Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
set dummy ${ac_tool_prefix}gcc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -4430,24 +4631,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="${ac_tool_prefix}gcc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -4457,9 +4658,9 @@
ac_ct_CC=$CC
# Extract the first word of "gcc", so it can be a program name with args.
set dummy gcc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_CC"; then
@@ -4470,24 +4671,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_ac_ct_CC="gcc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
ac_ct_CC=$ac_cv_prog_ac_ct_CC
if test -n "$ac_ct_CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
$as_echo "$ac_ct_CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -4496,7 +4697,7 @@
else
case $cross_compiling:$ac_tool_warned in
yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
ac_tool_warned=yes ;;
esac
@@ -4510,9 +4711,9 @@
if test -n "$ac_tool_prefix"; then
# Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
set dummy ${ac_tool_prefix}cc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -4523,24 +4724,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="${ac_tool_prefix}cc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -4550,9 +4751,9 @@
if test -z "$CC"; then
# Extract the first word of "cc", so it can be a program name with args.
set dummy cc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -4564,18 +4765,18 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
ac_prog_rejected=yes
continue
fi
ac_cv_prog_CC="cc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
if test $ac_prog_rejected = yes; then
@@ -4594,10 +4795,10 @@
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -4609,9 +4810,9 @@
do
# Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if test "${ac_cv_prog_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
@@ -4622,24 +4823,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
CC=$ac_cv_prog_CC
if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $CC" >&5
$as_echo "$CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -4653,9 +4854,9 @@
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_CC"; then
@@ -4666,24 +4867,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_ac_ct_CC="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
ac_ct_CC=$ac_cv_prog_ac_ct_CC
if test -n "$ac_ct_CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
$as_echo "$ac_ct_CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -4696,7 +4897,7 @@
else
case $cross_compiling:$ac_tool_warned in
yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
ac_tool_warned=yes ;;
esac
@@ -4707,42 +4908,62 @@
fi
-test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+test -z "$CC" && { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "no acceptable C compiler found in \$PATH
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
# Provide some information about the compiler.
-$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+$as_echo "$as_me:$LINENO: checking for C compiler version" >&5
set X $ac_compile
ac_compiler=$2
-for ac_option in --version -v -V -qversion; do
- { { ac_try="$ac_compiler $ac_option >&5"
+{ (ac_try="$ac_compiler --version >&5"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler --version >&5") 2>&5
ac_status=$?
- if test -s conftest.err; then
- sed '10a\
-... rest of stderr output deleted ...
- 10q' conftest.err >conftest.er1
- cat conftest.er1 >&5
- fi
- rm -f conftest.er1 conftest.err
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
-done
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -v >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compiler -V >&5") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+{ $as_echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
-if test "${ac_cv_c_compiler_gnu+set}" = set; then :
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -4764,16 +4985,37 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_compiler_gnu=yes
else
- ac_compiler_gnu=no
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_compiler_gnu=no
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_cv_c_compiler_gnu=$ac_compiler_gnu
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
$as_echo "$ac_cv_c_compiler_gnu" >&6; }
if test $ac_compiler_gnu = yes; then
GCC=yes
@@ -4782,16 +5024,20 @@
fi
ac_test_CFLAGS=${CFLAGS+set}
ac_save_CFLAGS=$CFLAGS
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+{ $as_echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
$as_echo_n "checking whether $CC accepts -g... " >&6; }
-if test "${ac_cv_prog_cc_g+set}" = set; then :
+if test "${ac_cv_prog_cc_g+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_save_c_werror_flag=$ac_c_werror_flag
ac_c_werror_flag=yes
ac_cv_prog_cc_g=no
CFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -4810,11 +5056,35 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_g=yes
else
- CFLAGS=""
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ CFLAGS=""
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -4833,12 +5103,36 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ :
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-else
- ac_c_werror_flag=$ac_save_c_werror_flag
+ ac_c_werror_flag=$ac_save_c_werror_flag
CFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN
@@ -4857,17 +5151,42 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_g=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_c_werror_flag=$ac_save_c_werror_flag
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
$as_echo "$ac_cv_prog_cc_g" >&6; }
if test "$ac_test_CFLAGS" = set; then
CFLAGS=$ac_save_CFLAGS
@@ -4884,14 +5203,18 @@
CFLAGS=
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+{ $as_echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5
$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
-if test "${ac_cv_prog_cc_c89+set}" = set; then :
+if test "${ac_cv_prog_cc_c89+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_cv_prog_cc_c89=no
ac_save_CC=$CC
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <stdarg.h>
#include <stdio.h>
@@ -4956,9 +5279,32 @@
-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
do
CC="$ac_save_CC $ac_arg"
- if ac_fn_c_try_compile "$LINENO"; then :
+ rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_prog_cc_c89=$ac_arg
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
+
rm -f core conftest.err conftest.$ac_objext
test "x$ac_cv_prog_cc_c89" != "xno" && break
done
@@ -4969,19 +5315,17 @@
# AC_CACHE_VAL
case "x$ac_cv_prog_cc_c89" in
x)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+ { $as_echo "$as_me:$LINENO: result: none needed" >&5
$as_echo "none needed" >&6; } ;;
xno)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+ { $as_echo "$as_me:$LINENO: result: unsupported" >&5
$as_echo "unsupported" >&6; } ;;
*)
CC="$CC $ac_cv_prog_cc_c89"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+ { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5
$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
esac
-if test "x$ac_cv_prog_cc_c89" != xno; then :
-fi
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
@@ -5000,14 +5344,14 @@
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
+{ $as_echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5
$as_echo_n "checking how to run the C preprocessor... " >&6; }
# On Suns, sometimes $CPP names a directory.
if test -n "$CPP" && test -d "$CPP"; then
CPP=
fi
if test -z "$CPP"; then
- if test "${ac_cv_prog_CPP+set}" = set; then :
+ if test "${ac_cv_prog_CPP+set}" = set; then
$as_echo_n "(cached) " >&6
else
# Double quotes because CPP needs to be expanded
@@ -5022,7 +5366,11 @@
# <limits.h> exists even on freestanding compilers.
# On the NeXT, cc -E runs the code through the compiler's parser,
# not just through cpp. "Syntax error" is here to catch this case.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef __STDC__
# include <limits.h>
@@ -5031,34 +5379,78 @@
#endif
Syntax error
_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ :
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-else
# Broken: fails on valid input.
continue
fi
+
rm -f conftest.err conftest.$ac_ext
# OK, works on sane cases. Now check whether nonexistent headers
# can be detected and how.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <ac_nonexistent.h>
_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
# Broken: success on invalid input.
continue
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
# Passes both tests.
ac_preproc_ok=:
break
fi
+
rm -f conftest.err conftest.$ac_ext
done
# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
rm -f conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then :
+if $ac_preproc_ok; then
break
fi
@@ -5070,7 +5462,7 @@
else
ac_cv_prog_CPP=$CPP
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+{ $as_echo "$as_me:$LINENO: result: $CPP" >&5
$as_echo "$CPP" >&6; }
ac_preproc_ok=false
for ac_c_preproc_warn_flag in '' yes
@@ -5081,7 +5473,11 @@
# <limits.h> exists even on freestanding compilers.
# On the NeXT, cc -E runs the code through the compiler's parser,
# not just through cpp. "Syntax error" is here to catch this case.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef __STDC__
# include <limits.h>
@@ -5090,40 +5486,87 @@
#endif
Syntax error
_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ :
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-else
# Broken: fails on valid input.
continue
fi
+
rm -f conftest.err conftest.$ac_ext
# OK, works on sane cases. Now check whether nonexistent headers
# can be detected and how.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <ac_nonexistent.h>
_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
# Broken: success on invalid input.
continue
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
# Passes both tests.
ac_preproc_ok=:
break
fi
+
rm -f conftest.err conftest.$ac_ext
done
# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
rm -f conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then :
-
+if $ac_preproc_ok; then
+ :
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
ac_ext=c
@@ -5133,9 +5576,9 @@
ac_compiler_gnu=$ac_cv_c_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+{ $as_echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5
$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
-if test "${ac_cv_path_GREP+set}" = set; then :
+if test "${ac_cv_path_GREP+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -z "$GREP"; then
@@ -5146,7 +5589,7 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_prog in grep ggrep; do
+ for ac_prog in grep ggrep; do
for ac_exec_ext in '' $ac_executable_extensions; do
ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
{ test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
@@ -5166,7 +5609,7 @@
$as_echo 'GREP' >> "conftest.nl"
"$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
- as_fn_arith $ac_count + 1 && ac_count=$as_val
+ ac_count=`expr $ac_count + 1`
if test $ac_count -gt ${ac_path_GREP_max-0}; then
# Best one so far, save it but keep looking for a better one
ac_cv_path_GREP="$ac_path_GREP"
@@ -5181,24 +5624,26 @@
$ac_path_GREP_found && break 3
done
done
- done
+done
IFS=$as_save_IFS
if test -z "$ac_cv_path_GREP"; then
- as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+$as_echo "$as_me: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+ { (exit 1); exit 1; }; }
fi
else
ac_cv_path_GREP=$GREP
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5
$as_echo "$ac_cv_path_GREP" >&6; }
GREP="$ac_cv_path_GREP"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+{ $as_echo "$as_me:$LINENO: checking for egrep" >&5
$as_echo_n "checking for egrep... " >&6; }
-if test "${ac_cv_path_EGREP+set}" = set; then :
+if test "${ac_cv_path_EGREP+set}" = set; then
$as_echo_n "(cached) " >&6
else
if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
@@ -5212,7 +5657,7 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_prog in egrep; do
+ for ac_prog in egrep; do
for ac_exec_ext in '' $ac_executable_extensions; do
ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
{ test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
@@ -5232,7 +5677,7 @@
$as_echo 'EGREP' >> "conftest.nl"
"$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
- as_fn_arith $ac_count + 1 && ac_count=$as_val
+ ac_count=`expr $ac_count + 1`
if test $ac_count -gt ${ac_path_EGREP_max-0}; then
# Best one so far, save it but keep looking for a better one
ac_cv_path_EGREP="$ac_path_EGREP"
@@ -5247,10 +5692,12 @@
$ac_path_EGREP_found && break 3
done
done
- done
+done
IFS=$as_save_IFS
if test -z "$ac_cv_path_EGREP"; then
- as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+$as_echo "$as_me: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+ { (exit 1); exit 1; }; }
fi
else
ac_cv_path_EGREP=$EGREP
@@ -5258,17 +5705,21 @@
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5
$as_echo "$ac_cv_path_EGREP" >&6; }
EGREP="$ac_cv_path_EGREP"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+{ $as_echo "$as_me:$LINENO: checking for ANSI C header files" >&5
$as_echo_n "checking for ANSI C header files... " >&6; }
-if test "${ac_cv_header_stdc+set}" = set; then :
+if test "${ac_cv_header_stdc+set}" = set; then
$as_echo_n "(cached) " >&6
else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <stdlib.h>
#include <stdarg.h>
@@ -5291,23 +5742,48 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
ac_cv_header_stdc=yes
else
- ac_cv_header_stdc=no
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_header_stdc=no
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
if test $ac_cv_header_stdc = yes; then
# SunOS 4.x string.h does not declare mem*, contrary to ANSI.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <string.h>
_ACEOF
if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "memchr" >/dev/null 2>&1; then :
-
+ $EGREP "memchr" >/dev/null 2>&1; then
+ :
else
ac_cv_header_stdc=no
fi
@@ -5317,14 +5793,18 @@
if test $ac_cv_header_stdc = yes; then
# ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <stdlib.h>
_ACEOF
if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "free" >/dev/null 2>&1; then :
-
+ $EGREP "free" >/dev/null 2>&1; then
+ :
else
ac_cv_header_stdc=no
fi
@@ -5334,10 +5814,14 @@
if test $ac_cv_header_stdc = yes; then
# /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
- if test "$cross_compiling" = yes; then :
+ if test "$cross_compiling" = yes; then
:
else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <ctype.h>
#include <stdlib.h>
@@ -5364,33 +5848,118 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ $as_echo "$as_me: program exited with status $ac_status" >&5
+$as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-else
- ac_cv_header_stdc=no
+( exit $ac_status )
+ac_cv_header_stdc=no
fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -rf conftest.dSYM
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
fi
+
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
$as_echo "$ac_cv_header_stdc" >&6; }
if test $ac_cv_header_stdc = yes; then
-$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+cat >>confdefs.h <<\_ACEOF
+#define STDC_HEADERS 1
+_ACEOF
fi
# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+
+
+
+
+
+
+
+
+
for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
inttypes.h stdint.h unistd.h
-do :
- as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
-ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
-"
-if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
+do
+as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+{ $as_echo "$as_me:$LINENO: checking for $ac_header" >&5
+$as_echo_n "checking for $ac_header... " >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+ $as_echo_n "(cached) " >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ eval "$as_ac_Header=yes"
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_Header=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+ac_res=`eval 'as_val=${'$as_ac_Header'}
+ $as_echo "$as_val"'`
+ { $as_echo "$as_me:$LINENO: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+as_val=`eval 'as_val=${'$as_ac_Header'}
+ $as_echo "$as_val"'`
+ if test "x$as_val" = x""yes; then
cat >>confdefs.h <<_ACEOF
#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
_ACEOF
@@ -5400,22 +5969,288 @@
done
-ac_fn_c_check_header_mongrel "$LINENO" "emmintrin.h" "ac_cv_header_emmintrin_h" "$ac_includes_default"
-if test "x$ac_cv_header_emmintrin_h" = x""yes; then :
+if test "${ac_cv_header_emmintrin_h+set}" = set; then
+ { $as_echo "$as_me:$LINENO: checking for emmintrin.h" >&5
+$as_echo_n "checking for emmintrin.h... " >&6; }
+if test "${ac_cv_header_emmintrin_h+set}" = set; then
+ $as_echo_n "(cached) " >&6
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_emmintrin_h" >&5
+$as_echo "$ac_cv_header_emmintrin_h" >&6; }
+else
+ # Is the header compilable?
+{ $as_echo "$as_me:$LINENO: checking emmintrin.h usability" >&5
+$as_echo_n "checking emmintrin.h usability... " >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+#include <emmintrin.h>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_header_compiler=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
-$as_echo "#define HAVE_EMMINTRIN 1" >>confdefs.h
+ ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:$LINENO: checking emmintrin.h presence" >&5
+$as_echo_n "checking emmintrin.h presence... " >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <emmintrin.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ ac_header_preproc=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_preproc=no
fi
+rm -f conftest.err conftest.$ac_ext
+{ $as_echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
-ac_fn_c_check_header_mongrel "$LINENO" "xmmintrin.h" "ac_cv_header_xmmintrin_h" "$ac_includes_default"
-if test "x$ac_cv_header_xmmintrin_h" = x""yes; then :
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+ yes:no: )
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: proceeding with the compiler's result" >&2;}
+ ac_header_preproc=yes
+ ;;
+ no:yes:* )
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: present but cannot be compiled" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: check for missing prerequisite headers?" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: see the Autoconf documentation" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: section \"Present But Cannot Be Compiled\"" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: proceeding with the preprocessor's result" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: proceeding with the preprocessor's result" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: emmintrin.h: in the future, the compiler will take precedence" >&5
+$as_echo "$as_me: WARNING: emmintrin.h: in the future, the compiler will take precedence" >&2;}
+ ( cat <<\_ASBOX
+## -------------------------------------- ##
+## Report this to jtromp AT princeton.edu ##
+## -------------------------------------- ##
+_ASBOX
+ ) | sed "s/^/$as_me: WARNING: /" >&2
+ ;;
+esac
+{ $as_echo "$as_me:$LINENO: checking for emmintrin.h" >&5
+$as_echo_n "checking for emmintrin.h... " >&6; }
+if test "${ac_cv_header_emmintrin_h+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ ac_cv_header_emmintrin_h=$ac_header_preproc
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_emmintrin_h" >&5
+$as_echo "$ac_cv_header_emmintrin_h" >&6; }
-$as_echo "#define HAVE_XMMINTRIN 1" >>confdefs.h
+fi
+if test "x$ac_cv_header_emmintrin_h" = x""yes; then
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_EMMINTRIN 1
+_ACEOF
+
fi
+if test "${ac_cv_header_xmmintrin_h+set}" = set; then
+ { $as_echo "$as_me:$LINENO: checking for xmmintrin.h" >&5
+$as_echo_n "checking for xmmintrin.h... " >&6; }
+if test "${ac_cv_header_xmmintrin_h+set}" = set; then
+ $as_echo_n "(cached) " >&6
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_xmmintrin_h" >&5
+$as_echo "$ac_cv_header_xmmintrin_h" >&6; }
+else
+ # Is the header compilable?
+{ $as_echo "$as_me:$LINENO: checking xmmintrin.h usability" >&5
+$as_echo_n "checking xmmintrin.h usability... " >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+#include <xmmintrin.h>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_header_compiler=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:$LINENO: checking xmmintrin.h presence" >&5
+$as_echo_n "checking xmmintrin.h presence... " >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <xmmintrin.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ ac_header_preproc=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ $as_echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+ yes:no: )
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: proceeding with the compiler's result" >&2;}
+ ac_header_preproc=yes
+ ;;
+ no:yes:* )
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: present but cannot be compiled" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: check for missing prerequisite headers?" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: see the Autoconf documentation" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: section \"Present But Cannot Be Compiled\"" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: proceeding with the preprocessor's result" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: proceeding with the preprocessor's result" >&2;}
+ { $as_echo "$as_me:$LINENO: WARNING: xmmintrin.h: in the future, the compiler will take precedence" >&5
+$as_echo "$as_me: WARNING: xmmintrin.h: in the future, the compiler will take precedence" >&2;}
+ ( cat <<\_ASBOX
+## -------------------------------------- ##
+## Report this to jtromp AT princeton.edu ##
+## -------------------------------------- ##
+_ASBOX
+ ) | sed "s/^/$as_me: WARNING: /" >&2
+ ;;
+esac
+{ $as_echo "$as_me:$LINENO: checking for xmmintrin.h" >&5
+$as_echo_n "checking for xmmintrin.h... " >&6; }
+if test "${ac_cv_header_xmmintrin_h+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ ac_cv_header_xmmintrin_h=$ac_header_preproc
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_xmmintrin_h" >&5
+$as_echo "$ac_cv_header_xmmintrin_h" >&6; }
+
+fi
+if test "x$ac_cv_header_xmmintrin_h" = x""yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_XMMINTRIN 1
+_ACEOF
+
+fi
+
+
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
@@ -5431,7 +6266,7 @@
# Check whether --with-scotch-dir was given.
-if test "${with_scotch_dir+set}" = set; then :
+if test "${with_scotch_dir+set}" = set; then
withval=$with_scotch_dir;
ac_scotch_dir="$withval";
@@ -5440,7 +6275,7 @@
# Check whether --with-scotch-includedir was given.
-if test "${with_scotch_includedir+set}" = set; then :
+if test "${with_scotch_includedir+set}" = set; then
withval=$with_scotch_includedir;
ac_scotch_include_dir="$withval";
@@ -5449,7 +6284,7 @@
# Check whether --with-scotch-libdir was given.
-if test "${with_scotch_libdir+set}" = set; then :
+if test "${with_scotch_libdir+set}" = set; then
withval=$with_scotch_libdir;
ac_scotch_lib_dir="$withval";
@@ -5479,30 +6314,55 @@
if test -n "${ac_scotch_lib_dir}"; then
LDFLAGS="${LDFLAGS} -L${ac_scotch_lib_dir}";
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for scotchfarchinit in -lscotch" >&5
+ { $as_echo "$as_me:$LINENO: checking for scotchfarchinit in -lscotch" >&5
$as_echo_n "checking for scotchfarchinit in -lscotch... " >&6; }
-if test "${ac_cv_lib_scotch_scotchfarchinit_+set}" = set; then :
+if test "${ac_cv_lib_scotch_scotchfarchinit_+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-lscotch -lscotcherr $LIBS"
-cat > conftest.$ac_ext <<_ACEOF
+cat >conftest.$ac_ext <<_ACEOF
program main
call scotchfarchinit
end
_ACEOF
-if ac_fn_fc_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_cv_lib_scotch_scotchfarchinit_=yes
else
- ac_cv_lib_scotch_scotchfarchinit_=no
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_lib_scotch_scotchfarchinit_=no
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_scotch_scotchfarchinit_" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_scotch_scotchfarchinit_" >&5
$as_echo "$ac_cv_lib_scotch_scotchfarchinit_" >&6; }
-if test "x$ac_cv_lib_scotch_scotchfarchinit_" = x""yes; then :
+if test "x$ac_cv_lib_scotch_scotchfarchinit_" = x""yes; then
scotch_lib="yes";
else
scotch_lib="no";LDFLAGS=${ac_save_ldflags}
@@ -5520,31 +6380,35 @@
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Scotch is usable" >&5
+{ $as_echo "$as_me:$LINENO: checking whether Scotch is usable" >&5
$as_echo_n "checking whether Scotch is usable... " >&6; }
if test "x${scotch_usable}" = "xyes"; then
-$as_echo "#define HAVE_SCOTCH 1" >>confdefs.h
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_SCOTCH 1
+_ACEOF
USE_BUNDLED_SCOTCH=0
SCOTCH_DIR="${ac_scotch_dir}"
SCOTCH_LIBDIR="${ac_scotch_lib_dir}"
SCOTCH_INCLUDEDIR="${ac_scotch_include_dir}"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+ { $as_echo "$as_me:$LINENO: result: yes" >&5
$as_echo "yes" >&6; }
else
-$as_echo "#define HAVE_SCOTCH 1" >>confdefs.h
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_SCOTCH 1
+_ACEOF
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using bundled scotch instead" >&5
+ { $as_echo "$as_me:$LINENO: result: no, using bundled scotch instead" >&5
$as_echo "no, using bundled scotch instead" >&6; }
for ac_prog in flex lex
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LEX+set}" = set; then :
+if test "${ac_cv_prog_LEX+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$LEX"; then
@@ -5555,24 +6419,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_LEX="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
LEX=$ac_cv_prog_LEX
if test -n "$LEX"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LEX" >&5
+ { $as_echo "$as_me:$LINENO: result: $LEX" >&5
$as_echo "$LEX" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -5601,20 +6465,20 @@
return ! yylex () + ! yywrap ();
}
_ACEOF
-{ { ac_try="$LEX conftest.l"
+{ (ac_try="$LEX conftest.l"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
(eval "$LEX conftest.l") 2>&5
ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking lex output file root" >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ $as_echo "$as_me:$LINENO: checking lex output file root" >&5
$as_echo_n "checking lex output file root... " >&6; }
-if test "${ac_cv_prog_lex_root+set}" = set; then :
+if test "${ac_cv_prog_lex_root+set}" = set; then
$as_echo_n "(cached) " >&6
else
@@ -5623,17 +6487,19 @@
elif test -f lexyy.c; then
ac_cv_prog_lex_root=lexyy
else
- as_fn_error $? "cannot find output from $LEX; giving up" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: cannot find output from $LEX; giving up" >&5
+$as_echo "$as_me: error: cannot find output from $LEX; giving up" >&2;}
+ { (exit 1); exit 1; }; }
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_lex_root" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_lex_root" >&5
$as_echo "$ac_cv_prog_lex_root" >&6; }
LEX_OUTPUT_ROOT=$ac_cv_prog_lex_root
if test -z "${LEXLIB+set}"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking lex library" >&5
+ { $as_echo "$as_me:$LINENO: checking lex library" >&5
$as_echo_n "checking lex library... " >&6; }
-if test "${ac_cv_lib_lex+set}" = set; then :
+if test "${ac_cv_lib_lex+set}" = set; then
$as_echo_n "(cached) " >&6
else
@@ -5641,28 +6507,55 @@
ac_cv_lib_lex='none needed'
for ac_lib in '' -lfl -ll; do
LIBS="$ac_lib $ac_save_LIBS"
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
`cat $LEX_OUTPUT_ROOT.c`
_ACEOF
-if ac_fn_fc_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_cv_lib_lex=$ac_lib
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
test "$ac_cv_lib_lex" != 'none needed' && break
done
LIBS=$ac_save_LIBS
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lex" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_lex" >&5
$as_echo "$ac_cv_lib_lex" >&6; }
test "$ac_cv_lib_lex" != 'none needed' && LEXLIB=$ac_cv_lib_lex
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether yytext is a pointer" >&5
+{ $as_echo "$as_me:$LINENO: checking whether yytext is a pointer" >&5
$as_echo_n "checking whether yytext is a pointer... " >&6; }
-if test "${ac_cv_prog_lex_yytext_pointer+set}" = set; then :
+if test "${ac_cv_prog_lex_yytext_pointer+set}" = set; then
$as_echo_n "(cached) " >&6
else
# POSIX says lex can declare yytext either as a pointer or an array; the
@@ -5671,38 +6564,69 @@
ac_cv_prog_lex_yytext_pointer=no
ac_save_LIBS=$LIBS
LIBS="$LEXLIB $ac_save_LIBS"
-cat > conftest.$ac_ext <<_ACEOF
+cat >conftest.$ac_ext <<_ACEOF
#define YYTEXT_POINTER 1
`cat $LEX_OUTPUT_ROOT.c`
_ACEOF
-if ac_fn_fc_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_cv_prog_lex_yytext_pointer=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_save_LIBS
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_lex_yytext_pointer" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_lex_yytext_pointer" >&5
$as_echo "$ac_cv_prog_lex_yytext_pointer" >&6; }
if test $ac_cv_prog_lex_yytext_pointer = yes; then
-$as_echo "#define YYTEXT_POINTER 1" >>confdefs.h
+cat >>confdefs.h <<\_ACEOF
+#define YYTEXT_POINTER 1
+_ACEOF
fi
rm -f conftest.l $LEX_OUTPUT_ROOT.c
fi
if test -z "$LEX" || test "X$LEX" = "Xno"; then
- as_fn_error $? "No suitable lex found" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: No suitable lex found" >&5
+$as_echo "$as_me: error: No suitable lex found" >&2;}
+ { (exit 1); exit 1; }; }
fi
for ac_prog in 'bison -y' byacc
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_YACC+set}" = set; then :
+if test "${ac_cv_prog_YACC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$YACC"; then
@@ -5713,24 +6637,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_YACC="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
YACC=$ac_cv_prog_YACC
if test -n "$YACC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $YACC" >&5
+ { $as_echo "$as_me:$LINENO: result: $YACC" >&5
$as_echo "$YACC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -5740,7 +6664,9 @@
test -n "$YACC" || YACC="yacc"
if test -z "$YACC" || test "X$YACC" = "Xno"; then
- as_fn_error $? "No suitable yacc or bison found" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: No suitable yacc or bison found" >&5
+$as_echo "$as_me: error: No suitable yacc or bison found" >&2;}
+ { (exit 1); exit 1; }; }
fi
ac_aux_dir=
for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
@@ -5759,7 +6685,9 @@
fi
done
if test -z "$ac_aux_dir"; then
- as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" >&5
+$as_echo "$as_me: error: cannot find install-sh or install.sh in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" >&2;}
+ { (exit 1); exit 1; }; }
fi
# These three variables are undocumented and unsupported,
@@ -5773,27 +6701,35 @@
# Make sure we can run config.sub.
$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
- as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: cannot run $SHELL $ac_aux_dir/config.sub" >&5
+$as_echo "$as_me: error: cannot run $SHELL $ac_aux_dir/config.sub" >&2;}
+ { (exit 1); exit 1; }; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5
+{ $as_echo "$as_me:$LINENO: checking build system type" >&5
$as_echo_n "checking build system type... " >&6; }
-if test "${ac_cv_build+set}" = set; then :
+if test "${ac_cv_build+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_build_alias=$build_alias
test "x$ac_build_alias" = x &&
ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"`
test "x$ac_build_alias" = x &&
- as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: cannot guess build type; you must specify one" >&5
+$as_echo "$as_me: error: cannot guess build type; you must specify one" >&2;}
+ { (exit 1); exit 1; }; }
ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` ||
- as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&5
+$as_echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&2;}
+ { (exit 1); exit 1; }; }
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_build" >&5
$as_echo "$ac_cv_build" >&6; }
case $ac_cv_build in
*-*-*) ;;
-*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;;
+*) { { $as_echo "$as_me:$LINENO: error: invalid value of canonical build" >&5
+$as_echo "$as_me: error: invalid value of canonical build" >&2;}
+ { (exit 1); exit 1; }; };;
esac
build=$ac_cv_build
ac_save_IFS=$IFS; IFS='-'
@@ -5809,24 +6745,28 @@
case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5
+{ $as_echo "$as_me:$LINENO: checking host system type" >&5
$as_echo_n "checking host system type... " >&6; }
-if test "${ac_cv_host+set}" = set; then :
+if test "${ac_cv_host+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test "x$host_alias" = x; then
ac_cv_host=$ac_cv_build
else
ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
- as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&5
+$as_echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&2;}
+ { (exit 1); exit 1; }; }
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_host" >&5
$as_echo "$ac_cv_host" >&6; }
case $ac_cv_host in
*-*-*) ;;
-*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;;
+*) { { $as_echo "$as_me:$LINENO: error: invalid value of canonical host" >&5
+$as_echo "$as_me: error: invalid value of canonical host" >&2;}
+ { (exit 1); exit 1; }; };;
esac
host=$ac_cv_host
ac_save_IFS=$IFS; IFS='-'
@@ -5865,9 +6805,13 @@
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
save_LIBS="$LIBS"
LIBS="$PTHREAD_LIBS $LIBS"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS" >&5
+ { $as_echo "$as_me:$LINENO: checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS" >&5
$as_echo_n "checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS... " >&6; }
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
@@ -5893,12 +6837,39 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
acx_pthread_ok=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $acx_pthread_ok" >&5
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+ { $as_echo "$as_me:$LINENO: result: $acx_pthread_ok" >&5
$as_echo "$acx_pthread_ok" >&6; }
if test x"$acx_pthread_ok" = xno; then
PTHREAD_LIBS=""
@@ -5960,12 +6931,12 @@
case $flag in
none)
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthreads work without any flags" >&5
+ { $as_echo "$as_me:$LINENO: checking whether pthreads work without any flags" >&5
$as_echo_n "checking whether pthreads work without any flags... " >&6; }
;;
-*)
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthreads work with $flag" >&5
+ { $as_echo "$as_me:$LINENO: checking whether pthreads work with $flag" >&5
$as_echo_n "checking whether pthreads work with $flag... " >&6; }
PTHREAD_CFLAGS="$flag"
;;
@@ -5973,9 +6944,9 @@
pthread-config)
# Extract the first word of "pthread-config", so it can be a program name with args.
set dummy pthread-config; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_acx_pthread_config+set}" = set; then :
+if test "${ac_cv_prog_acx_pthread_config+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$acx_pthread_config"; then
@@ -5986,14 +6957,14 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_acx_pthread_config="yes"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
test -z "$ac_cv_prog_acx_pthread_config" && ac_cv_prog_acx_pthread_config="no"
@@ -6001,10 +6972,10 @@
fi
acx_pthread_config=$ac_cv_prog_acx_pthread_config
if test -n "$acx_pthread_config"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $acx_pthread_config" >&5
+ { $as_echo "$as_me:$LINENO: result: $acx_pthread_config" >&5
$as_echo "$acx_pthread_config" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -6015,7 +6986,7 @@
;;
*)
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for the pthreads library -l$flag" >&5
+ { $as_echo "$as_me:$LINENO: checking for the pthreads library -l$flag" >&5
$as_echo_n "checking for the pthreads library -l$flag... " >&6; }
PTHREAD_LIBS="-l$flag"
;;
@@ -6035,7 +7006,11 @@
# pthread_cleanup_push because it is one of the few pthread
# functions on Solaris that doesn't have a non-functional libc stub.
# We try pthread_create on general principles.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <pthread.h>
#ifdef FC_DUMMY_MAIN
@@ -6056,16 +7031,43 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
acx_pthread_ok=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+
LIBS="$save_LIBS"
CFLAGS="$save_CFLAGS"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $acx_pthread_ok" >&5
+ { $as_echo "$as_me:$LINENO: result: $acx_pthread_ok" >&5
$as_echo "$acx_pthread_ok" >&6; }
if test "x$acx_pthread_ok" = xyes; then
break;
@@ -6084,11 +7086,15 @@
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
# Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for joinable pthread attribute" >&5
+ { $as_echo "$as_me:$LINENO: checking for joinable pthread attribute" >&5
$as_echo_n "checking for joinable pthread attribute... " >&6; }
attr_name=unknown
for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#include <pthread.h>
#ifdef FC_DUMMY_MAIN
@@ -6107,13 +7113,40 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
attr_name=$attr; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
done
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $attr_name" >&5
+ { $as_echo "$as_me:$LINENO: result: $attr_name" >&5
$as_echo "$attr_name" >&6; }
if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then
@@ -6123,14 +7156,14 @@
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if more special flags are required for pthreads" >&5
+ { $as_echo "$as_me:$LINENO: checking if more special flags are required for pthreads" >&5
$as_echo_n "checking if more special flags are required for pthreads... " >&6; }
flag=no
case "${host_cpu}-${host_os}" in
*-aix* | *-freebsd* | *-darwin*) flag="-D_THREAD_SAFE";;
*solaris* | *-osf* | *-hpux*) flag="-D_REENTRANT";;
esac
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${flag}" >&5
+ { $as_echo "$as_me:$LINENO: result: ${flag}" >&5
$as_echo "${flag}" >&6; }
if test "x$flag" != xno; then
PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS"
@@ -6145,9 +7178,9 @@
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_PTHREAD_CC+set}" = set; then :
+if test "${ac_cv_prog_PTHREAD_CC+set}" = set; then
$as_echo_n "(cached) " >&6
else
if test -n "$PTHREAD_CC"; then
@@ -6158,24 +7191,24 @@
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
ac_cv_prog_PTHREAD_CC="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
done
- done
+done
IFS=$as_save_IFS
fi
fi
PTHREAD_CC=$ac_cv_prog_PTHREAD_CC
if test -n "$PTHREAD_CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PTHREAD_CC" >&5
+ { $as_echo "$as_me:$LINENO: result: $PTHREAD_CC" >&5
$as_echo "$PTHREAD_CC" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
fi
@@ -6197,12 +7230,14 @@
# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
if test x"$acx_pthread_ok" = xyes; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: pthread found" >&5
+ { $as_echo "$as_me:$LINENO: result: pthread found" >&5
$as_echo "pthread found" >&6; }
:
else
acx_pthread_ok=no
- as_fn_error $? "pthread not found" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: pthread not found" >&5
+$as_echo "$as_me: error: pthread not found" >&2;}
+ { (exit 1); exit 1; }; }
fi
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
@@ -6377,6 +7412,9 @@
+
+
+
if test x"$MPIFC" = x; then
MPIFC=mpif90
fi
@@ -6412,60 +7450,72 @@
pyspecfem3D="$builddir/pyspecfem3D"
cd $srcdir
- { $as_echo "$as_me:${as_lineno-$LINENO}: downloading missing Python dependencies" >&5
+ { $as_echo "$as_me:$LINENO: downloading missing Python dependencies" >&5
$as_echo "$as_me: downloading missing Python dependencies" >&6;}
if { ac_try='$pyspecfem3D setup.py install_deps -zmxd $builddir/deps >&5 2>&5'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
(eval $ac_try) 2>&5
ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
-
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot download missing Python dependencies
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: cannot download missing Python dependencies
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot download missing Python dependencies
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: building Python dependencies" >&5
+
+ { $as_echo "$as_me:$LINENO: building Python dependencies" >&5
$as_echo "$as_me: building Python dependencies" >&6;}
if { ac_try='$pyspecfem3D setup.py develop -H None -f $builddir/deps -s $builddir -d $builddir/python >&5 2>&5'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
(eval $ac_try) 2>&5
ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
-
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "building Python dependencies
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: building Python dependencies
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: building Python dependencies
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for egg-related flags" >&5
+
+ { $as_echo "$as_me:$LINENO: checking for egg-related flags" >&5
$as_echo_n "checking for egg-related flags... " >&6; }
if { ac_try='$pyspecfem3D setup.py egg_flags >&5 2>&5'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
(eval $ac_try) 2>&5
ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ { $as_echo "$as_me:$LINENO: result: ok" >&5
$as_echo "ok" >&6; }
. egg-flags.sh
rm -f egg-flags.sh
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: failed" >&5
+ { $as_echo "$as_me:$LINENO: result: failed" >&5
$as_echo "failed" >&6; }
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot scan Python eggs for flags
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: cannot scan Python eggs for flags
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot scan Python eggs for flags
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
+
cd $builddir
rm -f pyspecfem3D
PYTHONPATH="$save_PYTHONPATH"
@@ -6502,9 +7552,9 @@
FC=$MPIFC
FCFLAGS="$FCFLAGS $FLAGS_NO_CHECK"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mpif.h works" >&5
+{ $as_echo "$as_me:$LINENO: checking whether mpif.h works" >&5
$as_echo_n "checking whether mpif.h works... " >&6; }
-cat > conftest.$ac_ext <<_ACEOF
+cat >conftest.$ac_ext <<_ACEOF
program main
@@ -6518,14 +7568,34 @@
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+ { $as_echo "$as_me:$LINENO: result: yes" >&5
$as_echo "yes" >&6; }
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
cit_mpif_h=unknown
cit_mpifc_info=`$FC -compile_info 2>/dev/null`
@@ -6535,13 +7605,16 @@
esac
done
if test "$cit_mpif_h" == "unknown"; then
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compile a trivial MPI program using $MPIFC
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: cannot compile a trivial MPI program using $MPIFC
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compile a trivial MPI program using $MPIFC
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ofile" >&5
+ { $as_echo "$as_me:$LINENO: creating $ofile" >&5
$as_echo "$as_me: creating $ofile" >&6;}
cat >"$cfgfile" <<END_OF_HEADER
! $ofile. Generated from $cit_mpif_h by configure.
@@ -6551,9 +7624,9 @@
mv -f "$cfgfile" "$ofile" || \
(rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether generated mpif.h works" >&5
+ { $as_echo "$as_me:$LINENO: checking whether generated mpif.h works" >&5
$as_echo_n "checking whether generated mpif.h works... " >&6; }
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
program main
@@ -6567,25 +7640,50 @@
end
_ACEOF
-if ac_fn_fc_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+ { $as_echo "$as_me:$LINENO: result: yes" >&5
$as_echo "yes" >&6; }
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+
+ { $as_echo "$as_me:$LINENO: result: no" >&5
$as_echo "no" >&6; }
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compile a trivial MPI program using $MPIFC
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: cannot compile a trivial MPI program using $MPIFC
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compile a trivial MPI program using $MPIFC
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
FC=$cit_fc_save_fc
@@ -6607,9 +7705,9 @@
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for alternate main to link with Fortran libraries" >&5
+{ $as_echo "$as_me:$LINENO: checking for alternate main to link with Fortran libraries" >&5
$as_echo_n "checking for alternate main to link with Fortran libraries... " >&6; }
-if test "${ac_cv_fc_main+set}" = set; then :
+if test "${ac_cv_fc_main+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_fc_m_save_LIBS=$LIBS
@@ -6623,7 +7721,11 @@
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
#ifdef FC_DUMMY_MAIN_EQ_F77
# undef F77_DUMMY_MAIN
@@ -6648,30 +7750,81 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
mv conftest.$ac_objext cfortran_test.$ac_objext
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compile a simple C program
-See \`config.log' for more details" "$LINENO" 5; }
+{ { $as_echo "$as_me:$LINENO: error: cannot compile a simple C program
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compile a simple C program
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }; }
fi
+
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
- cat > conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
subroutine foobar()
return
end
_ACEOF
-if ac_fn_fc_try_link "$LINENO"; then :
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_fc_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
ac_cv_fortran_main=$ac_func; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
rm -f cfortran_test* conftest*
done
ac_cv_fc_main=$ac_cv_fortran_main
@@ -6679,7 +7832,7 @@
LIBS=$ac_fc_m_save_LIBS
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_main" >&5
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_fc_main" >&5
$as_echo "$ac_cv_fc_main" >&6; }
cat >>confdefs.h <<_ACEOF
@@ -6728,13 +7881,13 @@
case $ac_val in #(
*${as_nl}*)
case $ac_var in #(
- *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+ *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5
$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
esac
case $ac_var in #(
_ | IFS | as_nl) ;; #(
BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
- *) { eval $ac_var=; unset $ac_var;} ;;
+ *) $as_unset $ac_var ;;
esac ;;
esac
done
@@ -6742,8 +7895,8 @@
(set) 2>&1 |
case $as_nl`(ac_space=' '; set) 2>&1` in #(
*${as_nl}ac_space=\ *)
- # `set' does not quote correctly, so add quotes: double-quote
- # substitution turns \\\\ into \\, and sed turns \\ into \.
+ # `set' does not quote correctly, so add quotes (double-quote
+ # substitution turns \\\\ into \\, and sed turns \\ into \).
sed -n \
"s/'/'\\\\''/g;
s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
@@ -6766,11 +7919,11 @@
if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
if test -w "$cache_file"; then
test "x$cache_file" != "x/dev/null" &&
- { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
+ { $as_echo "$as_me:$LINENO: updating cache $cache_file" >&5
$as_echo "$as_me: updating cache $cache_file" >&6;}
cat confcache >$cache_file
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
+ { $as_echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5
$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
fi
fi
@@ -6784,15 +7937,14 @@
ac_libobjs=
ac_ltlibobjs=
-U=
for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
# 1. Remove the extension, and $U if already installed.
ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
# 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR
# will be set to the directory where LIBOBJS objects are built.
- as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
- as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+ ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+ ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo'
done
LIBOBJS=$ac_libobjs
@@ -6800,22 +7952,34 @@
if test -z "${COND_PYRE_TRUE}" && test -z "${COND_PYRE_FALSE}"; then
- as_fn_error $? "conditional \"COND_PYRE\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: conditional \"COND_PYRE\" was never defined.
+Usually this means the macro was only invoked conditionally." >&5
+$as_echo "$as_me: error: conditional \"COND_PYRE\" was never defined.
+Usually this means the macro was only invoked conditionally." >&2;}
+ { (exit 1); exit 1; }; }
fi
if test -z "${COND_MPI_TRUE}" && test -z "${COND_MPI_FALSE}"; then
- as_fn_error $? "conditional \"COND_MPI\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: conditional \"COND_MPI\" was never defined.
+Usually this means the macro was only invoked conditionally." >&5
+$as_echo "$as_me: error: conditional \"COND_MPI\" was never defined.
+Usually this means the macro was only invoked conditionally." >&2;}
+ { (exit 1); exit 1; }; }
fi
+if test -z "${COND_CUDA_TRUE}" && test -z "${COND_CUDA_FALSE}"; then
+ { { $as_echo "$as_me:$LINENO: error: conditional \"COND_CUDA\" was never defined.
+Usually this means the macro was only invoked conditionally." >&5
+$as_echo "$as_me: error: conditional \"COND_CUDA\" was never defined.
+Usually this means the macro was only invoked conditionally." >&2;}
+ { (exit 1); exit 1; }; }
+fi
: ${CONFIG_STATUS=./config.status}
ac_write_fail=0
ac_clean_files_save=$ac_clean_files
ac_clean_files="$ac_clean_files $CONFIG_STATUS"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+{ $as_echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
-as_write_fail=0
-cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+cat >$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
#! $SHELL
# Generated by $as_me.
# Run this file to recreate the current configuration.
@@ -6825,18 +7989,17 @@
debug=false
ac_cs_recheck=false
ac_cs_silent=false
-
SHELL=\${CONFIG_SHELL-$SHELL}
-export SHELL
-_ASEOF
-cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
-## -------------------- ##
-## M4sh Initialization. ##
-## -------------------- ##
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
# Be more Bourne compatible
DUALCASE=1; export DUALCASE # for MKS sh
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
emulate sh
NULLCMD=:
# Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
@@ -6844,15 +8007,23 @@
alias -g '${1+"$@"}'='"$@"'
setopt NO_GLOB_SUBST
else
- case `(set -o) 2>/dev/null` in #(
- *posix*) :
- set -o posix ;; #(
- *) :
- ;;
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
esac
+
fi
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
as_nl='
'
export as_nl
@@ -6860,13 +8031,7 @@
as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
-# Prefer a ksh shell builtin over an external printf program on Solaris,
-# but without wasting forks for bash or zsh.
-if test -z "$BASH_VERSION$ZSH_VERSION" \
- && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
- as_echo='print -r --'
- as_echo_n='print -rn --'
-elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
as_echo='printf %s\n'
as_echo_n='printf %s'
else
@@ -6877,7 +8042,7 @@
as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
as_echo_n_body='eval
arg=$1;
- case $arg in #(
+ case $arg in
*"$as_nl"*)
expr "X$arg" : "X\\(.*\\)$as_nl";
arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
@@ -6900,7 +8065,14 @@
}
fi
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
# IFS
# We need space, tab and new line, in precisely that order. Quoting is
# there to prevent editors from complaining about space-tab.
@@ -6909,15 +8081,15 @@
IFS=" "" $as_nl"
# Find who we are. Look in the path if we contain no directory separator.
-case $0 in #((
+case $0 in
*[\\/]* ) as_myself=$0 ;;
*) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
- test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
- done
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
IFS=$as_save_IFS
;;
@@ -6929,16 +8101,12 @@
fi
if test ! -f "$as_myself"; then
$as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
- exit 1
+ { (exit 1); exit 1; }
fi
-# Unset variables that we do not need and which cause bugs (e.g. in
-# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
-# suppresses any "Segmentation fault" message there. '((' could
-# trigger a bug in pdksh 5.2.14.
-for as_var in BASH_ENV ENV MAIL MAILPATH
-do eval test x\${$as_var+set} = xset \
- && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
done
PS1='$ '
PS2='> '
@@ -6950,89 +8118,7 @@
LANGUAGE=C
export LANGUAGE
-# CDPATH.
-(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
-
-
-# as_fn_error STATUS ERROR [LINENO LOG_FD]
-# ----------------------------------------
-# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
-# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with STATUS, using 1 if that was 0.
-as_fn_error ()
-{
- as_status=$1; test $as_status -eq 0 && as_status=1
- if test "$4"; then
- as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
- fi
- $as_echo "$as_me: error: $2" >&2
- as_fn_exit $as_status
-} # as_fn_error
-
-
-# as_fn_set_status STATUS
-# -----------------------
-# Set $? to STATUS, without forking.
-as_fn_set_status ()
-{
- return $1
-} # as_fn_set_status
-
-# as_fn_exit STATUS
-# -----------------
-# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
-as_fn_exit ()
-{
- set +e
- as_fn_set_status $1
- exit $1
-} # as_fn_exit
-
-# as_fn_unset VAR
-# ---------------
-# Portably unset VAR.
-as_fn_unset ()
-{
- { eval $1=; unset $1;}
-}
-as_unset=as_fn_unset
-# as_fn_append VAR VALUE
-# ----------------------
-# Append the text in VALUE to the end of the definition contained in VAR. Take
-# advantage of any shell optimizations that allow amortized linear growth over
-# repeated appends, instead of the typical quadratic growth present in naive
-# implementations.
-if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
- eval 'as_fn_append ()
- {
- eval $1+=\$2
- }'
-else
- as_fn_append ()
- {
- eval $1=\$$1\$2
- }
-fi # as_fn_append
-
-# as_fn_arith ARG...
-# ------------------
-# Perform arithmetic evaluation on the ARGs, and store the result in the
-# global $as_val. Take advantage of shells that can avoid forks. The arguments
-# must be portable across $(()) and expr.
-if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
- eval 'as_fn_arith ()
- {
- as_val=$(( $* ))
- }'
-else
- as_fn_arith ()
- {
- as_val=`expr "$@" || test $? -eq 1`
- }
-fi # as_fn_arith
-
-
+# Required to use basename.
if expr a : '\(a\)' >/dev/null 2>&1 &&
test "X`expr 00001 : '.*\(...\)'`" = X001; then
as_expr=expr
@@ -7046,12 +8132,8 @@
as_basename=false
fi
-if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
- as_dirname=dirname
-else
- as_dirname=false
-fi
+# Name of the executable.
as_me=`$as_basename -- "$0" ||
$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
X"$0" : 'X\(//\)$' \| \
@@ -7071,25 +8153,76 @@
}
s/.*/./; q'`
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
+# CDPATH.
+$as_unset CDPATH
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line after each line using $LINENO; the second 'sed'
+ # does the real work. The second script uses 'N' to pair each
+ # line-number line with the line containing $LINENO, and appends
+ # trailing '-' during substitution so that $LINENO is not a special
+ # case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # scripts with optimization help from Paolo Bonzini. Blame Lee
+ # E. McMahon (1931-1989) for sed's syntax. :-)
+ sed -n '
+ p
+ /[$]LINENO/=
+ ' <$as_myself |
+ sed '
+ s/[$]LINENO.*/&-/
+ t lineno
+ b
+ :lineno
+ N
+ :loop
+ s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+ t loop
+ s/-\n.*//
+ ' >$as_me.lineno &&
+ chmod +x "$as_me.lineno" ||
+ { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensitive to this).
+ . "./$as_me.lineno"
+ # Exit status is that of the last command.
+ exit
+}
+
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
ECHO_C= ECHO_N= ECHO_T=
-case `echo -n x` in #(((((
+case `echo -n x` in
-n*)
- case `echo 'xy\c'` in
+ case `echo 'x\c'` in
*c*) ECHO_T=' ';; # ECHO_T is single tab character.
- xy) ECHO_C='\c';;
- *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
- ECHO_T=' ';;
+ *) ECHO_C='\c';;
esac;;
*)
ECHO_N='-n';;
esac
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
rm -f conf$$ conf$$.exe conf$$.file
if test -d conf$$.dir; then
@@ -7118,56 +8251,8 @@
rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
rmdir conf$$.dir 2>/dev/null
-
-# as_fn_mkdir_p
-# -------------
-# Create "$as_dir" as a directory, including parents if necessary.
-as_fn_mkdir_p ()
-{
-
- case $as_dir in #(
- -*) as_dir=./$as_dir;;
- esac
- test -d "$as_dir" || eval $as_mkdir_p || {
- as_dirs=
- while :; do
- case $as_dir in #(
- *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
- *) as_qdir=$as_dir;;
- esac
- as_dirs="'$as_qdir' $as_dirs"
- as_dir=`$as_dirname -- "$as_dir" ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
- test -d "$as_dir" && break
- done
- test -z "$as_dirs" || eval "mkdir $as_dirs"
- } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
-
-
-} # as_fn_mkdir_p
if mkdir -p . 2>/dev/null; then
- as_mkdir_p='mkdir -p "$as_dir"'
+ as_mkdir_p=:
else
test -d ./-p && rmdir ./-p
as_mkdir_p=false
@@ -7186,10 +8271,10 @@
if test -d "$1"; then
test -d "$1/.";
else
- case $1 in #(
+ case $1 in
-*)set "./$1";;
esac;
- case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
+ case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in
???[sx]*):;;*)false;;esac;fi
'\'' sh
'
@@ -7204,19 +8289,13 @@
exec 6>&1
-## ----------------------------------- ##
-## Main body of $CONFIG_STATUS script. ##
-## ----------------------------------- ##
-_ASEOF
-test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-# Save the log message, to keep $0 and so on meaningful, and to
+# Save the log message, to keep $[0] and so on meaningful, and to
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by Specfem 3D $as_me 2.0.1, which was
-generated by GNU Autoconf 2.66. Invocation command line was
+generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
CONFIG_HEADERS = $CONFIG_HEADERS
@@ -7247,15 +8326,13 @@
cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
ac_cs_usage="\
-\`$as_me' instantiates files and other configuration actions
-from templates according to the current configuration. Unless the files
-and actions are specified as TAGs, all are instantiated by default.
+\`$as_me' instantiates files from templates according to the
+current configuration.
-Usage: $0 [OPTION]... [TAG]...
+Usage: $0 [OPTION]... [FILE]...
-h, --help print this help, then exit
-V, --version print version number and configuration settings, then exit
- --config print configuration, then exit
-q, --quiet, --silent
do not print progress messages
-d, --debug don't remove temporary files
@@ -7271,17 +8348,16 @@
Configuration headers:
$config_headers
-Report bugs to <jtromp AT princeton.edu>."
+Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
Specfem 3D config.status 2.0.1
-configured by $0, generated by GNU Autoconf 2.66,
- with options \\"\$ac_cs_config\\"
+configured by $0, generated by GNU Autoconf 2.63,
+ with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 2008 Free Software Foundation, Inc.
This config.status script is free software; the Free Software Foundation
gives unlimited permission to copy, distribute and modify it."
@@ -7314,8 +8390,6 @@
ac_cs_recheck=: ;;
--version | --versio | --versi | --vers | --ver | --ve | --v | -V )
$as_echo "$ac_cs_version"; exit ;;
- --config | --confi | --conf | --con | --co | --c )
- $as_echo "$ac_cs_config"; exit ;;
--debug | --debu | --deb | --de | --d | -d )
debug=: ;;
--file | --fil | --fi | --f )
@@ -7323,19 +8397,20 @@
case $ac_optarg in
*\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
esac
- as_fn_append CONFIG_FILES " '$ac_optarg'"
+ CONFIG_FILES="$CONFIG_FILES '$ac_optarg'"
ac_need_defaults=false;;
--header | --heade | --head | --hea )
$ac_shift
case $ac_optarg in
*\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
esac
- as_fn_append CONFIG_HEADERS " '$ac_optarg'"
+ CONFIG_HEADERS="$CONFIG_HEADERS '$ac_optarg'"
ac_need_defaults=false;;
--he | --h)
# Conflict between --help and --header
- as_fn_error $? "ambiguous option: \`$1'
-Try \`$0 --help' for more information.";;
+ { $as_echo "$as_me: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; };;
--help | --hel | -h )
$as_echo "$ac_cs_usage"; exit ;;
-q | -quiet | --quiet | --quie | --qui | --qu | --q \
@@ -7343,10 +8418,11 @@
ac_cs_silent=: ;;
# This is an error.
- -*) as_fn_error $? "unrecognized option: \`$1'
-Try \`$0 --help' for more information." ;;
+ -*) { $as_echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; } ;;
- *) as_fn_append ac_config_targets " $1"
+ *) ac_config_targets="$ac_config_targets $1"
ac_need_defaults=false ;;
esac
@@ -7408,7 +8484,9 @@
"src/decompose_mesh_SCOTCH/scotch_5.1.11/src/Makefile.inc") CONFIG_FILES="$CONFIG_FILES src/decompose_mesh_SCOTCH/scotch_5.1.11/src/Makefile.inc" ;;
"src/check_mesh_quality_CUBIT_Abaqus/Makefile") CONFIG_FILES="$CONFIG_FILES src/check_mesh_quality_CUBIT_Abaqus/Makefile" ;;
- *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+ *) { { $as_echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+$as_echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+ { (exit 1); exit 1; }; };;
esac
done
@@ -7434,7 +8512,7 @@
trap 'exit_status=$?
{ test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
' 0
- trap 'as_fn_exit 1' 1 2 13 15
+ trap '{ (exit 1); exit 1; }' 1 2 13 15
}
# Create a (secure) tmp directory for tmp files.
@@ -7445,7 +8523,11 @@
{
tmp=./conf$$-$RANDOM
(umask 077 && mkdir "$tmp")
-} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+} ||
+{
+ $as_echo "$as_me: cannot create a temporary directory in ." >&2
+ { (exit 1); exit 1; }
+}
# Set up the scripts for CONFIG_FILES section.
# No need to generate them if there are no CONFIG_FILES.
@@ -7453,13 +8535,8 @@
if test -n "$CONFIG_FILES"; then
-ac_cr=`echo X | tr X '\015'`
-# On cygwin, bash can eat \r inside `` if the user requested igncr.
-# But we know of no other shell where ac_cr would be empty at this
-# point, so we can use a bashism as a fallback.
-if test "x$ac_cr" = x; then
- eval ac_cr=\$\'\\r\'
-fi
+ac_cr='
+'
ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
ac_cs_awk_cr='\\r'
@@ -7476,18 +8553,24 @@
echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
echo "_ACEOF"
} >conf$$subs.sh ||
- as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
-ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
+ { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+ { (exit 1); exit 1; }; }
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'`
ac_delim='%!_!# '
for ac_last_try in false false false false false :; do
. ./conf$$subs.sh ||
- as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+ { (exit 1); exit 1; }; }
ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
if test $ac_delim_n = $ac_delim_num; then
break
elif $ac_last_try; then
- as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+ { (exit 1); exit 1; }; }
else
ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
fi
@@ -7509,7 +8592,7 @@
t delim
:nl
h
-s/\(.\{148\}\)..*/\1/
+s/\(.\{148\}\).*/\1/
t more1
s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
p
@@ -7523,7 +8606,7 @@
t nl
:delim
h
-s/\(.\{148\}\)..*/\1/
+s/\(.\{148\}\).*/\1/
t more2
s/["\\]/\\&/g; s/^/"/; s/$/"/
p
@@ -7576,28 +8659,22 @@
else
cat
fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \
- || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not setup config files machinery" >&5
+$as_echo "$as_me: error: could not setup config files machinery" >&2;}
+ { (exit 1); exit 1; }; }
_ACEOF
-# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
-# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
# trailing colons and then remove the whole line if VPATH becomes empty
# (actually we leave an empty line to preserve line numbers).
if test "x$srcdir" = x.; then
- ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{
-h
-s///
-s/^/:/
-s/[ ]*$/:/
-s/:\$(srcdir):/:/g
-s/:\${srcdir}:/:/g
-s/:@srcdir@:/:/g
-s/^:*//
+ ac_vpsub='/^[ ]*VPATH[ ]*=/{
+s/:*\$(srcdir):*/:/
+s/:*\${srcdir}:*/:/
+s/:*@srcdir@:*/:/
+s/^\([^=]*=[ ]*\):*/\1/
s/:*$//
-x
-s/\(=[ ]*\).*/\1/
-G
-s/\n//
s/^[^=]*=[ ]*$//
}'
fi
@@ -7625,7 +8702,9 @@
if test -z "$ac_t"; then
break
elif $ac_last_try; then
- as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_HEADERS" >&5
+$as_echo "$as_me: error: could not make $CONFIG_HEADERS" >&2;}
+ { (exit 1); exit 1; }; }
else
ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
fi
@@ -7710,7 +8789,9 @@
_ACAWK
_ACEOF
cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
- as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: could not setup config headers machinery" >&5
+$as_echo "$as_me: error: could not setup config headers machinery" >&2;}
+ { (exit 1); exit 1; }; }
fi # test -n "$CONFIG_HEADERS"
@@ -7723,7 +8804,9 @@
esac
case $ac_mode$ac_tag in
:[FHL]*:*);;
- :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
+ :L* | :C*:*) { { $as_echo "$as_me:$LINENO: error: invalid tag $ac_tag" >&5
+$as_echo "$as_me: error: invalid tag $ac_tag" >&2;}
+ { (exit 1); exit 1; }; };;
:[FH]-) ac_tag=-:-;;
:[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
esac
@@ -7751,10 +8834,12 @@
[\\/$]*) false;;
*) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
esac ||
- as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+ { { $as_echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5
+$as_echo "$as_me: error: cannot find input file: $ac_f" >&2;}
+ { (exit 1); exit 1; }; };;
esac
case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
- as_fn_append ac_file_inputs " '$ac_f'"
+ ac_file_inputs="$ac_file_inputs '$ac_f'"
done
# Let's still pretend it is `configure' which instantiates (i.e., don't
@@ -7765,7 +8850,7 @@
`' by configure.'
if test x"$ac_file" != x-; then
configure_input="$ac_file. $configure_input"
- { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+ { $as_echo "$as_me:$LINENO: creating $ac_file" >&5
$as_echo "$as_me: creating $ac_file" >&6;}
fi
# Neutralize special characters interpreted by sed in replacement strings.
@@ -7778,7 +8863,9 @@
case $ac_tag in
*:-:* | *:-) cat >"$tmp/stdin" \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
+ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+ { (exit 1); exit 1; }; } ;;
esac
;;
esac
@@ -7806,7 +8893,47 @@
q
}
s/.*/./; q'`
- as_dir="$ac_dir"; as_fn_mkdir_p
+ { as_dir="$ac_dir"
+ case $as_dir in #(
+ -*) as_dir=./$as_dir;;
+ esac
+ test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || {
+ as_dirs=
+ while :; do
+ case $as_dir in #(
+ *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+ *) as_qdir=$as_dir;;
+ esac
+ as_dirs="'$as_qdir' $as_dirs"
+ as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ test -d "$as_dir" && break
+ done
+ test -z "$as_dirs" || eval "mkdir $as_dirs"
+ } || test -d "$as_dir" || { { $as_echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5
+$as_echo "$as_me: error: cannot create directory $as_dir" >&2;}
+ { (exit 1); exit 1; }; }; }
ac_builddir=.
case "$ac_dir" in
@@ -7854,6 +8981,7 @@
# If the template does not know about datarootdir, expand it.
# FIXME: This hack should be removed a few years after 2.60.
ac_datarootdir_hack=; ac_datarootdir_seen=
+
ac_sed_dataroot='
/datarootdir/ {
p
@@ -7863,11 +8991,12 @@
/@docdir@/p
/@infodir@/p
/@localedir@/p
-/@mandir@/p'
+/@mandir@/p
+'
case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
*datarootdir*) ac_datarootdir_seen=yes;;
*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+ { $as_echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
@@ -7877,7 +9006,7 @@
s&@infodir@&$infodir&g
s&@localedir@&$localedir&g
s&@mandir@&$mandir&g
- s&\\\${datarootdir}&$datarootdir&g' ;;
+ s&\\\${datarootdir}&$datarootdir&g' ;;
esac
_ACEOF
@@ -7904,22 +9033,26 @@
$ac_datarootdir_hack
"
eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+ { (exit 1); exit 1; }; }
test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
{ ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
{ ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined. Please make sure it is defined" >&5
+ { $as_echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined. Please make sure it is defined." >&5
$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined. Please make sure it is defined" >&2;}
+which seems to be undefined. Please make sure it is defined." >&2;}
rm -f "$tmp/stdin"
case $ac_file in
-) cat "$tmp/out" && rm -f "$tmp/out";;
*) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";;
esac \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+ { (exit 1); exit 1; }; }
;;
:H)
#
@@ -7930,19 +9063,25 @@
$as_echo "/* $configure_input */" \
&& eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs"
} >"$tmp/config.h" \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+ { (exit 1); exit 1; }; }
if diff "$ac_file" "$tmp/config.h" >/dev/null 2>&1; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
+ { $as_echo "$as_me:$LINENO: $ac_file is unchanged" >&5
$as_echo "$as_me: $ac_file is unchanged" >&6;}
else
rm -f "$ac_file"
mv "$tmp/config.h" "$ac_file" \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+ { (exit 1); exit 1; }; }
fi
else
$as_echo "/* $configure_input */" \
&& eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs" \
- || as_fn_error $? "could not create -" "$LINENO" 5
+ || { { $as_echo "$as_me:$LINENO: error: could not create -" >&5
+$as_echo "$as_me: error: could not create -" >&2;}
+ { (exit 1); exit 1; }; }
fi
;;
@@ -7952,12 +9091,15 @@
done # for ac_tag
-as_fn_exit 0
+{ (exit 0); exit 0; }
_ACEOF
+chmod +x $CONFIG_STATUS
ac_clean_files=$ac_clean_files_save
test $ac_write_fail = 0 ||
- as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
+ { { $as_echo "$as_me:$LINENO: error: write failure creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: write failure creating $CONFIG_STATUS" >&2;}
+ { (exit 1); exit 1; }; }
# configure is writing to config.log, and then calls config.status.
@@ -7978,10 +9120,10 @@
exec 5>>config.log
# Use ||, not &&, to avoid exiting from the if with $? = 1, which
# would make configure fail if this is the last instruction.
- $ac_cs_success || as_fn_exit 1
+ $ac_cs_success || { (exit 1); exit 1; }
fi
if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+ { $as_echo "$as_me:$LINENO: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
fi
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/in_data_files/Par_file
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/in_data_files/Par_file 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/in_data_files/Par_file 2011-10-06 03:31:24 UTC (rev 19027)
@@ -52,4 +52,6 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/layered_halfspace/in_data_files/Par_file
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/layered_halfspace/in_data_files/Par_file 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/layered_halfspace/in_data_files/Par_file 2011-10-06 03:31:24 UTC (rev 19027)
@@ -52,4 +52,6 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/meshfem3D_examples/many_interfaces/Par_file
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/meshfem3D_examples/many_interfaces/Par_file 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/meshfem3D_examples/many_interfaces/Par_file 2011-10-06 03:31:24 UTC (rev 19027)
@@ -52,4 +52,6 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/meshfem3D_examples/simple_model/Par_file
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/meshfem3D_examples/simple_model/Par_file 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/meshfem3D_examples/simple_model/Par_file 2011-10-06 03:31:24 UTC (rev 19027)
@@ -52,4 +52,6 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/tomographic_model/in_data_files/Par_file
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/tomographic_model/in_data_files/Par_file 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/tomographic_model/in_data_files/Par_file 2011-10-06 03:31:24 UTC (rev 19027)
@@ -52,4 +52,6 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file 2011-10-06 03:31:24 UTC (rev 19027)
@@ -53,4 +53,6 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/in_data_files/Par_file.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/in_data_files/Par_file.in 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/in_data_files/Par_file.in 2011-10-06 03:31:24 UTC (rev 19027)
@@ -52,4 +52,6 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/check_mesh_quality_CUBIT_Abaqus/constants.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/check_mesh_quality_CUBIT_Abaqus/constants.h (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/check_mesh_quality_CUBIT_Abaqus/constants.h 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,37 @@
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! number of GLL points in each direction of an element (degree plus one)
+ integer, parameter :: NGLLX = 5
+ integer, parameter :: NGLLY = NGLLX
+ integer, parameter :: NGLLZ = NGLLX
+
+! very large and very small values
+ double precision, parameter :: HUGEVAL = 1.d+30,TINYVAL = 1.d-9
+
+! some useful constants
+ double precision, parameter :: PI = 3.141592653589793d0
+
Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/check_fields_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,418 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+#include "prepare_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Check functions
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_displ_gpu,
+ CHECK_MAX_NORM_DISPL_GPU)(int* size, float* displ,long* Mesh_pointer_f,int* announceID) {
+
+TRACE("check_max_norm_displ_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(displ, mp->d_displ,*size*sizeof(float),cudaMemcpyDeviceToHost);
+ float maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(displ[i]));
+ }
+ printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_vector,
+ CHECK_MAX_NORM_VECTOR)(int* size, float* vector1, int* announceID) {
+
+TRACE("check_max_norm_vector");
+
+ int procid;
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ float maxnorm=0;
+ int maxloc;
+ for(int i=0;i<*size;i++) {
+ if(maxnorm<fabsf(vector1[i])) {
+ maxnorm = vector1[i];
+ maxloc = i;
+ }
+ }
+ printf("%d:maxnorm of vector %d [%d] = %e\n",procid,*announceID,maxloc,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_displ,
+ CHECK_MAX_NORM_DISPL)(int* size, float* displ, int* announceID) {
+
+TRACE("check_max_norm_displ");
+
+ float maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(displ[i]));
+ }
+ printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_b_displ_gpu,
+ CHECK_MAX_NORM_B_DISPL_GPU)(int* size, float* b_displ,long* Mesh_pointer_f,int* announceID) {
+
+TRACE("check_max_norm_b_displ_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ float* b_accel = (float*)malloc(*size*sizeof(float));
+
+ cudaMemcpy(b_displ, mp->d_b_displ,*size*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(float),cudaMemcpyDeviceToHost);
+
+ float maxnorm=0;
+ float maxnorm_accel=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
+ maxnorm_accel = MAX(maxnorm,fabsf(b_accel[i]));
+ }
+ free(b_accel);
+ printf("%d: maxnorm of backward displ = %e\n",*announceID,maxnorm);
+ printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm_accel);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_b_accel_gpu,
+ CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, float* b_accel,long* Mesh_pointer_f,int* announceID) {
+
+TRACE("check_max_norm_b_accel_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(float),cudaMemcpyDeviceToHost);
+
+ float maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
+ }
+ printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_b_veloc_gpu,
+ CHECK_MAX_NORM_B_VELOC_GPU)(int* size, float* b_veloc,long* Mesh_pointer_f,int* announceID) {
+
+TRACE("check_max_norm_b_veloc_gpu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(b_veloc, mp->d_b_veloc,*size*sizeof(float),cudaMemcpyDeviceToHost);
+
+ float maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_veloc[i]));
+ }
+ printf("%d: maxnorm of backward veloc = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_b_displ,
+ CHECK_MAX_NORM_B_DISPL)(int* size, float* b_displ,int* announceID) {
+
+TRACE("check_max_norm_b_displ");
+
+ float maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
+ }
+ printf("%d:maxnorm of backward displ = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_max_norm_b_accel,
+ CHECK_MAX_NORM_B_ACCEL)(int* size, float* b_accel,int* announceID) {
+
+TRACE("check_max_norm_b_accel");
+
+ float maxnorm=0;
+
+ for(int i=0;i<*size;i++) {
+ maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
+ }
+ printf("%d:maxnorm of backward accel = %e\n",*announceID,maxnorm);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(check_error_vectors,
+ CHECK_ERROR_VECTORS)(int* sizef, float* vector1,float* vector2) {
+
+TRACE("check_error_vectors");
+
+ int size = *sizef;
+
+ double diff2 = 0;
+ double sum = 0;
+ double temp;
+ double maxerr=0;
+ int maxerrorloc;
+
+ for(int i=0;i<size;++i) {
+ temp = vector1[i]-vector2[i];
+ diff2 += temp*temp;
+ sum += vector1[i]*vector1[i];
+ if(maxerr < fabsf(temp)) {
+ maxerr = abs(temp);
+ maxerrorloc = i;
+ }
+ }
+
+ printf("rel error = %f, maxerr = %e @ %d\n",diff2/sum,maxerr,maxerrorloc);
+ int myrank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+ if(myrank==0) {
+ for(int i=maxerrorloc;i>maxerrorloc-5;i--) {
+ printf("[%d]: %e vs. %e\n",i,vector1[i],vector2[i]);
+ }
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Auxiliary functions
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(get_max_accel,
+ GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {
+
+TRACE("get_max_accel");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+ int procid;
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ int size = *sizef;
+ int it = *itf;
+ float* accel_cpy = (float*)malloc(size*sizeof(float));
+ cudaMemcpy(accel_cpy,mp->d_accel,size*sizeof(float),cudaMemcpyDeviceToHost);
+ float maxval=0;
+ for(int i=0;i<size;++i) {
+ maxval = MAX(maxval,accel_cpy[i]);
+ }
+ printf("%d/%d: max=%e\n",it,procid,maxval);
+ free(accel_cpy);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void get_maximum_kernel(float* array, int size, float* d_max){
+
+ /* simplest version: uses only 1 thread
+ float max;
+ max = 0;
+ // finds maximum value in array
+ if( size > 0 ){
+ max = abs(array[0]);
+ for( int i=1; i < size; i++){
+ if( abs(array[i]) > max ) max = abs(array[i]);
+ }
+ }
+ *d_max = max;
+ */
+
+ // reduction example:
+ __shared__ float sdata[256] ;
+
+ // load shared mem
+ unsigned int tid = threadIdx.x;
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+
+ // loads absolute values into shared memory
+ sdata[tid] = (i < size) ? fabs(array[i]) : 0.0 ;
+
+ __syncthreads();
+
+ // do reduction in shared mem
+ for(unsigned int s=blockDim.x/2; s>0; s>>=1)
+ {
+ if (tid < s){
+ // summation:
+ //sdata[tid] += sdata[tid + s];
+ // maximum:
+ if( sdata[tid] < sdata[tid + s] ) sdata[tid] = sdata[tid + s];
+ }
+ __syncthreads();
+ }
+
+ // write result for this block to global mem
+ if (tid == 0) d_max[blockIdx.x] = sdata[0];
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(get_norm_acoustic_from_device_cuda,
+ GET_NORM_ACOUSTIC_FROM_DEVICE_CUDA)(float* norm,
+ long* Mesh_pointer_f,
+ int* SIMULATION_TYPE) {
+
+TRACE("get_norm_acoustic_from_device_cuda");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ float max;
+ float *d_max;
+
+
+
+ max = 0;
+
+ /* way 1 : timing Elapsed time: 8.464813e-03
+ float* h_array;
+ h_array = (float*)calloc(mp->NGLOB_AB,sizeof(float));
+
+ print_CUDA_error_if_any(cudaMemcpy(h_array,mp->d_potential_dot_dot_acoustic,
+ sizeof(float)*(mp->NGLOB_AB),cudaMemcpyDeviceToHost),131);
+
+ // finds maximum value in array
+ max = h_array[0];
+ for( int i=1; i < mp->NGLOB_AB; i++){
+ if( abs(h_array[i]) > max ) max = abs(h_array[i]);
+ }
+ free(h_array);
+ */
+
+ /* way 2: timing Elapsed time: 8.818102e-02
+ // launch simple kernel
+ cudaMalloc((void**)&d_max,sizeof(float));
+
+ dim3 grid(1,1);
+ dim3 threads(1,1,1);
+
+ get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ mp->NGLOB_AB,
+ d_max);
+ print_CUDA_error_if_any(cudaMemcpy(&max,d_max, sizeof(float), cudaMemcpyDeviceToHost),222);
+
+ cudaFree(d_max);
+ */
+
+ // way 2 b: timing Elapsed time: 1.236916e-03
+ // launch simple reduction kernel
+ float* h_max;
+ int blocksize = 256;
+
+ int num_blocks_x = ceil(mp->NGLOB_AB/blocksize);
+ //printf("num_blocks_x %i \n",num_blocks_x);
+
+ h_max = (float*) calloc(num_blocks_x,sizeof(float));
+ cudaMalloc((void**)&d_max,num_blocks_x*sizeof(float));
+
+ dim3 grid(num_blocks_x,1);
+ dim3 threads(blocksize,1,1);
+
+ if(*SIMULATION_TYPE == 1 ){
+ get_maximum_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ mp->NGLOB_AB,
+ d_max);
+ }
+
+ if(*SIMULATION_TYPE == 3 ){
+ get_maximum_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
+ mp->NGLOB_AB,
+ d_max);
+ }
+
+ print_CUDA_error_if_any(cudaMemcpy(h_max,d_max,num_blocks_x*sizeof(float),cudaMemcpyDeviceToHost),222);
+
+ // determines max for all blocks
+ max = h_max[0];
+ for(int i=1;i<num_blocks_x;i++) {
+ if( max < h_max[i]) max = h_max[i];
+ }
+
+ cudaFree(d_max);
+ free(h_max);
+
+ /* way 3: doesn't work properly...
+ cublasStatus status;
+
+ // Initialize CUBLAS
+ status = cublasInit();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! CUBLAS initialization error\n");
+ exit(1);
+ }
+
+ // cublas function: cublasIsamax
+ // finds the smallest index of the maximum magnitude element of single
+ // precision vector x
+ int incr = 1;
+ int imax = 0;
+ imax = cublasIsamax(mp->NGLOB_AB,(float*)mp->d_potential_dot_dot_acoustic, incr);
+ status= cublasGetError();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! CUBLAS error in cublasIsamax\n");
+ exit(1);
+ }
+
+ print_CUDA_error_if_any(cudaMemcpy(&max,&(mp->d_potential_dot_dot_acoustic[imax]), sizeof(float), cudaMemcpyDeviceToHost),222);
+
+ printf("maximum %i %i %f \n",mp->NGLOB_AB,imax,max);
+
+ // Shutdown
+ status = cublasShutdown();
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ fprintf (stderr, "!!!! shutdown error (A)\n");
+ exit(1);
+ }
+
+ */
+
+ // return result
+ *norm = max;
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("after get_maximum_kernel");
+#endif
+}
+
+
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_cuda.cu)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,773 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+// #include "epik_user.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// elastic domain sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+// crashes if the CMTSOLUTION does not match the mesh properly
+__global__ void compute_add_sources_kernel(float* accel, int* ibool, int* ispec_is_inner, int phase_is_inner, float* sourcearrays, double* stf_pre_compute,int myrank, int* islice_selected_source, int* ispec_selected_source, int* ispec_is_elastic, int NSOURCES,float* d_debug) {
+ int i = threadIdx.x;
+ int j = threadIdx.y;
+ int k = threadIdx.z;
+
+ int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx
+ int ispec;
+ int iglob;
+ double stf;
+
+ if(isource < NSOURCES) { // when NSOURCES > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
+
+ if(myrank == islice_selected_source[isource]) {
+
+ ispec = ispec_selected_source[isource]-1;
+
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] == 1) {
+
+ stf = stf_pre_compute[isource];
+ iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+ atomicAdd(&accel[iglob*3],
+ sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf);
+ atomicAdd(&accel[iglob*3+1],
+ sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 1, i,j,k)]*stf);
+ // if((iglob*3+2 == 304598)) {
+ // atomicAdd(&d_debug[0],1.0f);
+ // d_debug[1] = accel[iglob*3+2];
+ // d_debug[2] = sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)];
+ // d_debug[3] = stf;
+ // }
+ // d_debug[4] = 42.0f;
+ atomicAdd(&accel[iglob*3+2],
+ sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)]*stf);
+ }
+ }
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(add_sourcearrays_adjoint_cuda,
+ ADD_SOURCEARRAYS_ADJOINT_CUDA)(long* Mesh_pointer,
+ int* USE_FORCE_POINT_SOURCE,
+ double* h_stf_pre_compute,int* NSOURCES,
+ int* phase_is_inner,int* myrank) {
+TRACE("add_sourcearrays_adjoint_cuda");
+// EPIK_TRACER("add_sourcearrays_adjoint_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+ if(*USE_FORCE_POINT_SOURCE) {
+ printf("USE FORCE POINT SOURCE not implemented for GPU_MODE");
+ MPI_Abort(MPI_COMM_WORLD, 1);
+ }
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,(*NSOURCES)*sizeof(double),cudaMemcpyHostToDevice),18);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("noise_read_add_surface_movie_cuda_kernel");
+#endif
+
+ int num_blocks_x = *NSOURCES;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(5,5,5);
+
+ float* d_debug;
+ // float* h_debug = (float*)calloc(128,sizeof(float));
+ // cudaMalloc((void**)&d_debug,128*sizeof(float));
+ // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
+
+ compute_add_sources_kernel<<<grid,threads>>>(mp->d_b_accel,mp->d_ibool,
+ mp->d_ispec_is_inner, *phase_is_inner,
+ mp->d_sourcearrays,
+ mp->d_stf_pre_compute,
+ *myrank,
+ mp->d_islice_selected_source,mp->d_ispec_selected_source,
+ mp->d_ispec_is_elastic,
+ *NSOURCES,
+ d_debug);
+
+ // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+ // for(int i=0;i<10;i++) {
+ // printf("debug[%d] = %e \n",i,h_debug[i]);
+ // }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("add_sourcearrays_adjoint_cuda");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_add_sources_elastic_cuda,
+ COMPUTE_ADD_SOURCES_ELASTIC_CUDA)(long* Mesh_pointer_f,
+ int* NSPEC_ABf, int* NGLOB_ABf,
+ int* phase_is_innerf,int* NSOURCESf,
+ int* itf, float* dtf, float* t0f,
+ int* SIMULATION_TYPEf,int* NSTEPf,
+ int* NOISE_TOMOGRAPHYf,
+ int* USE_FORCE_POINT_SOURCEf,
+ double* h_stf_pre_compute, int* myrankf) {
+
+TRACE("compute_add_sources_elastic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ //int NSPEC_AB = *NSPEC_ABf;
+ //int NGLOB_AB = *NGLOB_ABf;
+ int phase_is_inner = *phase_is_innerf;
+ //int it = *itf;
+ //float dt = *dtf;
+ //float t0 = *t0f;
+ //int SIMULATION_TYPE = *SIMULATION_TYPEf;
+ //int NSTEP = *NSTEPf;
+ //int NOISE_TOMOGRAPHY = *NOISE_TOMOGRAPHYf;
+ int NSOURCES = *NSOURCESf;
+ //int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
+ int myrank = *myrankf;
+
+ float* d_debug;
+ int num_blocks_x = NSOURCES;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ //double* d_stf_pre_compute;
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(5,5,5);
+ // (float* accel, int* ibool, int* ispec_is_inner, int phase_is_inner, float* sourcearrays, double* stf_pre_compute,int myrank, int* islice_selected_source, int* ispec_selected_source, int* ispec_is_elastic, int NSOURCES)
+
+
+
+ compute_add_sources_kernel<<<grid,threads>>>(mp->d_accel,mp->d_ibool, mp->d_ispec_is_inner, phase_is_inner, mp->d_sourcearrays, mp->d_stf_pre_compute,myrank, mp->d_islice_selected_source,mp->d_ispec_selected_source,mp->d_ispec_is_elastic, NSOURCES,d_debug);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_add_sources_kernel");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void add_source_master_rec_noise_cuda_kernel(int* ibool, int* ispec_selected_rec, int irec_master_noise, real* accel, real* noise_sourcearray, int it) {
+ int tx = threadIdx.x;
+ int iglob = ibool[tx + 125*(ispec_selected_rec[irec_master_noise-1]-1)]-1;
+
+ // not sure if we need atomic operations but just in case...
+ // accel[3*iglob] += noise_sourcearray[3*tx + 3*125*it];
+ // accel[1+3*iglob] += noise_sourcearray[1+3*tx + 3*125*it];
+ // accel[2+3*iglob] += noise_sourcearray[2+3*tx + 3*125*it];
+
+ atomicAdd(&accel[iglob*3],noise_sourcearray[3*tx + 3*125*it]);
+ atomicAdd(&accel[iglob*3+1],noise_sourcearray[1+3*tx + 3*125*it]);
+ atomicAdd(&accel[iglob*3+2],noise_sourcearray[2+3*tx + 3*125*it]);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(add_source_master_rec_noise_cuda,
+ ADD_SOURCE_MASTER_REC_NOISE_CUDA)(long* Mesh_pointer_f,
+ int* myrank_f,
+ int* it_f,
+ int* irec_master_noise_f,
+ int* islice_selected_rec) {
+
+TRACE("add_source_master_rec_noise_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ int it = *it_f-1; // -1 for Fortran -> C indexing differences
+ int irec_master_noise = *irec_master_noise_f;
+ int myrank = *myrank_f;
+ dim3 grid(1,1,1);
+ dim3 threads(125,1,1);
+ if(myrank == islice_selected_rec[irec_master_noise-1]) {
+ add_source_master_rec_noise_cuda_kernel<<<grid,threads>>>(mp->d_ibool, mp->d_ispec_selected_rec,
+ irec_master_noise, mp->d_accel,
+ mp->d_noise_sourcearray, it);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("add_source_master_rec_noise_cuda_kernel");
+#endif
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void add_sources_SIM_TYPE_2_OR_3_kernel(float* accel, int nrec,
+ float* adj_sourcearrays,
+ int* ibool,
+ int* ispec_is_inner,
+ int* ispec_selected_rec,
+ int phase_is_inner,
+ int* islice_selected_rec,
+ int* pre_computed_irec,
+ int nadj_rec_local,
+ int NTSTEP_BETWEEN_ADJSRC,
+ int myrank,
+ int* debugi,
+ float* debugf) {
+ int irec_local = blockIdx.x + gridDim.x*blockIdx.y;
+ if(irec_local<nadj_rec_local) { // when nrec > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
+
+ int irec = pre_computed_irec[irec_local];
+
+ int ispec_selected = ispec_selected_rec[irec]-1;
+ if(ispec_is_inner[ispec_selected] == phase_is_inner) {
+ int i = threadIdx.x;
+ int j = threadIdx.y;
+ int k = threadIdx.z;
+ int iglob = ibool[i+5*(j+5*(k+5*ispec_selected))]-1;
+
+ // atomic operations are absolutely necessary for correctness!
+ atomicAdd(&(accel[0+3*iglob]),adj_sourcearrays[INDEX5(5,5,5,3,
+ i,j,k,
+ 0,
+ irec_local)]);
+
+ atomicAdd(&accel[1+3*iglob], adj_sourcearrays[INDEX5(5,5,5,3,
+ i,j,k,
+ 1,
+ irec_local)]);
+
+ atomicAdd(&accel[2+3*iglob],adj_sourcearrays[INDEX5(5,5,5,3,
+ i,j,k,
+ 2,
+ irec_local)]);
+ }
+
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(add_sources_sim_type_2_or_3,
+ ADD_SOURCES_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
+ float* h_adj_sourcearrays,
+ int* size_adj_sourcearrays, int* ispec_is_inner,
+ int* phase_is_inner, int* ispec_selected_rec,
+ int* ibool,
+ int* myrank, int* nrec, int* time_index,
+ int* h_islice_selected_rec,int* nadj_rec_local,
+ int* NTSTEP_BETWEEN_READ_ADJSRC) {
+
+TRACE("add_sources_sim_type_2_or_3");
+
+ if(*nadj_rec_local > 0) {
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+ int rank;
+ MPI_Comm_rank(MPI_COMM_WORLD,&rank);
+
+ // make sure grid dimension is less than 65535 in x dimension
+ int num_blocks_x = *nadj_rec_local;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y,1);
+ dim3 threads(5,5,5);
+
+ float* d_adj_sourcearrays;
+ print_CUDA_error_if_any(cudaMalloc((void**)&d_adj_sourcearrays,
+ (*nadj_rec_local)*3*125*sizeof(float)),1);
+ float* h_adj_sourcearrays_slice = (float*)malloc((*nadj_rec_local)*3*125*sizeof(float));
+
+ int* h_pre_computed_irec = new int[*nadj_rec_local];
+ int* d_pre_computed_irec;
+ cudaMalloc((void**)&d_pre_computed_irec,(*nadj_rec_local)*sizeof(int));
+
+ // build slice of adj_sourcearrays because full array is *very* large.
+ int irec_local = 0;
+ for(int irec = 0;irec<*nrec;irec++) {
+ if(*myrank == h_islice_selected_rec[irec]) {
+ irec_local++;
+ h_pre_computed_irec[irec_local-1] = irec;
+ if(ispec_is_inner[ispec_selected_rec[irec]-1] == *phase_is_inner) {
+ for(int k=0;k<5;k++) {
+ for(int j=0;j<5;j++) {
+ for(int i=0;i<5;i++) {
+
+ h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+ i,j,k,0,
+ irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,
+ *time_index-1,
+ 0,i,j,k)];
+
+ h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+ i,j,k,1,
+ irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,
+ *time_index-1,
+ 1,i,j,k)];
+
+ h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
+ i,j,k,2,
+ irec_local-1)]
+ = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ 3,5,5,
+ irec_local-1,
+ *time_index-1,
+ 2,i,j,k)];
+
+
+ }
+ }
+ }
+ }
+ }
+ }
+ // printf("irec_local vs. *nadj_rec_local -> %d vs. %d\n",irec_local,*nadj_rec_local);
+ // for(int ispec=0;ispec<(*nadj_rec_local);ispec++) {
+ // for(int i=0;i<5;i++)
+ // for(int j=0;j<5;j++)
+ // for(int k=0;k<5;k++) {
+ // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,0,ispec)] =
+ // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
+ // ispec,
+ // *time_index-1,
+ // 0,
+ // i,j,k)];
+ // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,1,ispec)] =
+ // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
+ // ispec,
+ // *time_index-1,
+ // 1,
+ // i,j,k)];
+ // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,2,ispec)] =
+ // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_ADJSRC,3,5,5,
+ // ispec,
+ // *time_index-1,
+ // 2,
+ // i,j,k)];
+ // }
+
+ // }
+
+ cudaMemcpy(d_adj_sourcearrays, h_adj_sourcearrays_slice,(*nadj_rec_local)*3*125*sizeof(float),
+ cudaMemcpyHostToDevice);
+
+
+ // the irec_local variable needs to be precomputed (as
+ // h_pre_comp..), because normally it is in the loop updating accel,
+ // and due to how it's incremented, it cannot be parallelized
+
+ // int irec_local=0;
+ // for(int irec=0;irec<*nrec;irec++) {
+ // if(*myrank == h_islice_selected_rec[irec]) {
+ // h_pre_computed_irec_local_index[irec] = irec_local;
+ // irec_local++;
+ // if(irec_local==1) {
+ // // printf("%d:first useful irec==%d\n",rank,irec);
+ // }
+ // }
+ // else h_pre_computed_irec_local_index[irec] = 0;
+ // }
+ cudaMemcpy(d_pre_computed_irec,h_pre_computed_irec,
+ (*nadj_rec_local)*sizeof(int),cudaMemcpyHostToDevice);
+ // pause_for_debugger(1);
+ int* d_debugi, *h_debugi;
+ float* d_debugf, *h_debugf;
+ h_debugi = (int*)calloc(num_blocks_x,sizeof(int));
+ cudaMalloc((void**)&d_debugi,num_blocks_x*sizeof(int));
+ cudaMemcpy(d_debugi,h_debugi,num_blocks_x*sizeof(int),cudaMemcpyHostToDevice);
+ h_debugf = (float*)calloc(num_blocks_x,sizeof(float));
+ cudaMalloc((void**)&d_debugf,num_blocks_x*sizeof(float));
+ cudaMemcpy(d_debugf,h_debugf,num_blocks_x*sizeof(float),cudaMemcpyHostToDevice);
+
+ add_sources_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_accel, *nrec,
+ d_adj_sourcearrays, mp->d_ibool,
+ mp->d_ispec_is_inner,
+ mp->d_ispec_selected_rec,
+ *phase_is_inner,
+ mp->d_islice_selected_rec,
+ d_pre_computed_irec,
+ *nadj_rec_local,
+ *NTSTEP_BETWEEN_READ_ADJSRC,
+ *myrank,
+ d_debugi,d_debugf);
+
+ cudaMemcpy(h_debugi,d_debugi,num_blocks_x*sizeof(int),cudaMemcpyDeviceToHost);
+ cudaMemcpy(h_debugf,d_debugf,num_blocks_x*sizeof(float),cudaMemcpyDeviceToHost);
+
+ // printf("%d: pre_com0:%d\n",rank,h_pre_computed_irec_local_index[0]);
+ // printf("%d: pre_com1:%d\n",rank,h_pre_computed_irec_local_index[1]);
+ // printf("%d: pre_com2:%d\n",rank,h_pre_computed_irec_local_index[2]);
+ // for(int i=156;i<(156+30);i++) {
+ // if(rank==0) printf("%d:debug[%d] = i/f = %d / %e\n",rank,i,h_debugi[i],h_debugf[i]);
+ // }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ // MPI_Barrier(MPI_COMM_WORLD);
+ exit_on_cuda_error("add_sources_SIM_TYPE_2_OR_3_kernel");
+
+ // printf("Proc %d exiting with successful kernel\n",rank);
+ // exit(1);
+#endif
+ delete h_pre_computed_irec;
+ cudaFree(d_adj_sourcearrays);
+ cudaFree(d_pre_computed_irec);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// acoustic sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_add_sources_acoustic_kernel(float* potential_dot_dot_acoustic,
+ int* ibool,
+ int* ispec_is_inner,
+ int phase_is_inner,
+ float* sourcearrays,
+ double* stf_pre_compute,
+ int myrank,
+ int* islice_selected_source,
+ int* ispec_selected_source,
+ int* ispec_is_acoustic,
+ float* kappastore,
+ int NSOURCES) {
+ int i = threadIdx.x;
+ int j = threadIdx.y;
+ int k = threadIdx.z;
+
+ int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx
+
+ int ispec;
+ int iglob;
+ double stf;
+ float kappal;
+
+ if( isource < NSOURCES ){
+
+ //if(myrank == 0 && i== 0 && j == 0 && k == 0) printf("source isource = %i \n",isource);
+
+ if(myrank == islice_selected_source[isource]) {
+
+ ispec = ispec_selected_source[isource]-1;
+
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec] == 1) {
+
+ stf = stf_pre_compute[isource];
+ iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+ kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
+
+ //printf("source ispec = %i %i %e %e \n",ispec,iglob,stf,kappal);
+ //printf("source arr = %e %i %i %i %i %i\n", -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal,i,j,k,iglob,ispec);
+
+ atomicAdd(&potential_dot_dot_acoustic[iglob],
+ -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal);
+
+ // potential_dot_dot_acoustic[iglob] +=
+ // -sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf/kappal;
+
+ //printf("potential = %e %i %i %i %i %i\n", potential_dot_dot_acoustic[iglob],i,j,k,iglob,ispec);
+
+
+ }
+ }
+ }
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_add_sources_acoustic_cuda,
+ COMPUTE_ADD_SOURCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* NSOURCESf,
+ int* SIMULATION_TYPEf,
+ int* USE_FORCE_POINT_SOURCEf,
+ double* h_stf_pre_compute,
+ int* myrankf) {
+
+TRACE("compute_add_sources_acoustic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ int phase_is_inner = *phase_is_innerf;
+ //int SIMULATION_TYPE = *SIMULATION_TYPEf;
+ int NSOURCES = *NSOURCESf;
+ //int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
+ int myrank = *myrankf;
+
+ int num_blocks_x = NSOURCES;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ // copies pre-computed source time factors onto GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(5,5,5);
+
+ compute_add_sources_acoustic_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner,
+ mp->d_sourcearrays,
+ mp->d_stf_pre_compute,
+ myrank,
+ mp->d_islice_selected_source,
+ mp->d_ispec_selected_source,
+ mp->d_ispec_is_acoustic,
+ mp->d_kappastore,
+ NSOURCES);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_add_sources_acoustic_cuda");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_add_sources_acoustic_sim3_cuda,
+ COMPUTE_ADD_SOURCES_ACOUSTIC_SIM3_CUDA)(long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* NSOURCESf,
+ int* SIMULATION_TYPEf,
+ int* USE_FORCE_POINT_SOURCEf,
+ double* h_stf_pre_compute,
+ int* myrankf) {
+
+TRACE("compute_add_sources_acoustic_sim3_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ int phase_is_inner = *phase_is_innerf;
+ //int SIMULATION_TYPE = *SIMULATION_TYPEf;
+ int NSOURCES = *NSOURCESf;
+ //int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
+ int myrank = *myrankf;
+
+ int num_blocks_x = NSOURCES;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ // copies source time factors onto GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(5,5,5);
+
+ compute_add_sources_acoustic_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ phase_is_inner,
+ mp->d_sourcearrays,
+ mp->d_stf_pre_compute,
+ myrank,
+ mp->d_islice_selected_source,
+ mp->d_ispec_selected_source,
+ mp->d_ispec_is_acoustic,
+ mp->d_kappastore,
+ NSOURCES);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_add_sources_acoustic_sim3_cuda");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// acoustic adjoint sources
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void add_sources_acoustic_SIM_TYPE_2_OR_3_kernel(float* potential_dot_dot_acoustic,
+ int nrec,
+ int pre_computed_index,
+ float* adj_sourcearrays,
+ int* ibool,
+ int* ispec_is_inner,
+ int* ispec_is_acoustic,
+ float* kappastore,
+ int* ispec_selected_rec,
+ int phase_is_inner,
+ int* islice_selected_rec,
+ int* pre_computed_irec_local_index,
+ int nadj_rec_local,
+ int NTSTEP_BETWEEN_ADJSRC,
+ int myrank) {
+
+ int irec = blockIdx.x + gridDim.x*blockIdx.y;
+
+ //float kappal;
+ int i,j,k,iglob,ispec;
+
+ // because of grid shape, irec can be too big
+ if(irec<nrec) {
+
+ // adds source only if this proc carries the sources
+ if( myrank == islice_selected_rec[irec] ){
+
+ // adds acoustic source
+ ispec = ispec_selected_rec[irec]-1;
+ if( ispec_is_acoustic[ispec] == 1 ){
+
+ // checks if element is in phase_is_inner run
+ if(ispec_is_inner[ispec] == phase_is_inner) {
+ i = threadIdx.x;
+ j = threadIdx.y;
+ k = threadIdx.z;
+ iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
+
+ //kappal = kappastore[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+
+ //potential_dot_dot_acoustic[iglob] += adj_sourcearrays[INDEX6(nadj_rec_local,NTSTEP_BETWEEN_ADJSRC,3,5,5,
+ // pre_computed_irec_local_index[irec],
+ // pre_computed_index,
+ // 0,
+ // i,j,k)]/kappal;
+
+ // beware, for acoustic medium, a pressure source would be taking the negative
+ // and divide by Kappa of the fluid;
+ // this would have to be done when constructing the adjoint source.
+ //
+ // note: we take the first component of the adj_sourcearrays
+ // the idea is to have e.g. a pressure source, where all 3 components would be the same
+
+ atomicAdd(&potential_dot_dot_acoustic[iglob],
+ +adj_sourcearrays[INDEX6(nadj_rec_local,NTSTEP_BETWEEN_ADJSRC,3,5,5,
+ pre_computed_irec_local_index[irec],pre_computed_index-1,
+ 0,i,j,k)] // / kappal
+ );
+ }
+ }
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(add_sources_acoustic_sim_type_2_or_3_cuda,
+ ADD_SOURCES_ACOUSTIC_SIM_TYPE_2_OR_3_CUDA)(long* Mesh_pointer,
+ float* h_adj_sourcearrays,
+ int* size_adj_sourcearrays,
+ int* phase_is_inner,
+ int* myrank,
+ int* nrec,
+ int* pre_computed_index,
+ int* h_islice_selected_rec,
+ int* nadj_rec_local,
+ int* NTSTEP_BETWEEN_ADJSRC) {
+
+TRACE("add_sources_acoustic_sim_type_2_or_3_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ // make sure grid dimension is less than 65535 in x dimension
+ int num_blocks_x = *nrec;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y,1);
+ dim3 threads(5,5,5);
+
+ // copies source arrays
+ // daniel: todo workaround -- adj_sourcearrays can be very big, but here only at
+ // specific time it (pre_computed_irec_local_index) is actually needed...
+ float* d_adj_sourcearrays;
+ print_CUDA_error_if_any(cudaMalloc((void**)&d_adj_sourcearrays,
+ (*size_adj_sourcearrays)*sizeof(float)),731);
+ print_CUDA_error_if_any(cudaMemcpy(d_adj_sourcearrays, h_adj_sourcearrays,
+ (*size_adj_sourcearrays)*sizeof(float),cudaMemcpyHostToDevice),732);
+
+ //int* h_pre_computed_irec_local_index = new int[*nadj_rec_local];
+ int* h_pre_computed_irec_local_index = (int*) calloc(*nrec,sizeof(int));
+
+ int* d_pre_computed_irec_local_index;
+ print_CUDA_error_if_any(cudaMalloc((void**)&d_pre_computed_irec_local_index,
+ (*nrec)*sizeof(int)),741);
+
+ // the irec_local variable needs to be precomputed (as
+ // h_pre_comp..), because normally it is in the loop updating accel,
+ // and due to how it's incremented, it cannot be parallized
+ int irec_local=0;
+ for(int irec=0;irec<*nrec;irec++) {
+ if(*myrank == h_islice_selected_rec[irec]) {
+ h_pre_computed_irec_local_index[irec] = irec_local;
+ irec_local++;
+ }
+ else h_pre_computed_irec_local_index[irec] = 0;
+ }
+
+ //daniel
+ //printf("irec local: rank=%d irec_local=%d nadj_rec_local=%d nrec=%d \n",*myrank,irec_local,*nadj_rec_local,*nrec);
+
+ print_CUDA_error_if_any(cudaMemcpy(d_pre_computed_irec_local_index,h_pre_computed_irec_local_index,
+ (*nrec)*sizeof(int),cudaMemcpyHostToDevice),742);
+
+ add_sources_acoustic_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ *nrec,
+ *pre_computed_index,
+ d_adj_sourcearrays,
+ mp->d_ibool,
+ mp->d_ispec_is_inner,
+ mp->d_ispec_is_acoustic,
+ mp->d_kappastore,
+ mp->d_ispec_selected_rec,
+ *phase_is_inner,
+ mp->d_islice_selected_rec,
+ d_pre_computed_irec_local_index,
+ *nadj_rec_local,
+ *NTSTEP_BETWEEN_ADJSRC,
+ *myrank);
+
+ //delete h_pre_computed_irec_local_index;
+ free(h_pre_computed_irec_local_index);
+ cudaFree(d_adj_sourcearrays);
+ cudaFree(d_pre_computed_irec_local_index);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("add_sources_acoustic_SIM_TYPE_2_OR_3_kernel");
+#endif
+}
Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,853 @@
+/*
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+*/
+
+
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// prepares a device array with with all inter-element edge-nodes -- this
+// is followed by a memcpy and MPI operations
+__global__ void prepare_boundary_potential_on_device(float* d_potential_dot_dot_acoustic,
+ float* d_send_potential_dot_dot_buffer,
+ int num_interfaces_ext_mesh,
+ int max_nibool_interfaces_ext_mesh,
+ int* d_nibool_interfaces_ext_mesh,
+ int* d_ibool_interfaces_ext_mesh) {
+
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+ //int bx = blockIdx.y*gridDim.x+blockIdx.x;
+ //int tx = threadIdx.x;
+ int iinterface=0;
+
+ for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
+ if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
+ d_send_potential_dot_dot_buffer[(id + max_nibool_interfaces_ext_mesh*iinterface)] =
+ d_potential_dot_dot_acoustic[(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)];
+ }
+ }
+
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// prepares and transfers the inter-element edge-nodes to the host to be MPI'd
+extern "C"
+void FC_FUNC_(transfer_boundary_potential_from_device,
+ TRANSFER_BOUNDARY_POTENTIAL_FROM_DEVICE)(
+ int* size,
+ long* Mesh_pointer_f,
+ float* potential_dot_dot_acoustic,
+ float* send_potential_dot_dot_buffer,
+ int* num_interfaces_ext_mesh,
+ int* max_nibool_interfaces_ext_mesh,
+ int* nibool_interfaces_ext_mesh,
+ int* ibool_interfaces_ext_mesh,
+ int* FORWARD_OR_ADJOINT){
+
+TRACE("transfer_boundary_potential_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ int blocksize = 256;
+ int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ if(*FORWARD_OR_ADJOINT == 1) {
+ prepare_boundary_potential_on_device<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ mp->d_send_potential_dot_dot_buffer,
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
+ }
+ else if(*FORWARD_OR_ADJOINT == 3) {
+ prepare_boundary_potential_on_device<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
+ mp->d_send_potential_dot_dot_buffer,
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_boundary_kernel");
+#endif
+
+
+ cudaMemcpy(send_potential_dot_dot_buffer,mp->d_send_potential_dot_dot_buffer,
+ *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real),cudaMemcpyDeviceToHost);
+
+ // finish timing of kernel+memcpy
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("boundary xfer d->h Time: %f ms\n",time);
+
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void assemble_boundary_potential_on_device(float* d_potential_dot_dot_acoustic,
+ float* d_send_potential_dot_dot_buffer,
+ int num_interfaces_ext_mesh,
+ int max_nibool_interfaces_ext_mesh,
+ int* d_nibool_interfaces_ext_mesh,
+ int* d_ibool_interfaces_ext_mesh) {
+
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+ //int bx = blockIdx.y*gridDim.x+blockIdx.x;
+ //int tx = threadIdx.x;
+ int iinterface=0;
+
+ for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
+ if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
+
+ // for testing atomic operations against not atomic operations (0.1ms vs. 0.04 ms)
+ // d_potential_dot_dot_acoustic[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)] +=
+ // d_send_potential_dot_dot_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)];
+
+ atomicAdd(&d_potential_dot_dot_acoustic[(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)],
+ d_send_potential_dot_dot_buffer[(id + max_nibool_interfaces_ext_mesh*iinterface)]);
+ }
+ }
+ // ! This step is done via previous function transfer_and_assemble...
+ // ! do iinterface = 1, num_interfaces_ext_mesh
+ // ! do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ // ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ // ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
+ // ! enddo
+ // ! enddo
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_and_assemble_potential_to_device,
+ TRANSFER_AND_ASSEMBLE_POTENTIAL_TO_DEVICE)(
+ long* Mesh_pointer,
+ real* potential_dot_dot_acoustic,
+ real* buffer_recv_scalar_ext_mesh,
+ int* num_interfaces_ext_mesh,
+ int* max_nibool_interfaces_ext_mesh,
+ int* nibool_interfaces_ext_mesh,
+ int* ibool_interfaces_ext_mesh,
+ int* FORWARD_OR_ADJOINT) {
+
+TRACE("transfer_and_assemble_potential_to_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(mp->d_send_potential_dot_dot_buffer, buffer_recv_scalar_ext_mesh,
+ *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real), cudaMemcpyHostToDevice);
+
+ int blocksize = 256;
+ int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+ //double start_time = get_time();
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
+ // cudaEventRecord( start, 0 );
+ if(*FORWARD_OR_ADJOINT == 1) {
+ //assemble forward field
+ assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
+ mp->d_send_potential_dot_dot_buffer,
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
+ }
+ else if(*FORWARD_OR_ADJOINT == 3) {
+ //assemble reconstructed/backward field
+ assemble_boundary_potential_on_device<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
+ mp->d_send_potential_dot_dot_buffer,
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
+ }
+
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("Boundary Assemble Kernel Execution Time: %f ms\n",time);
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_and_assemble_potential_to_device");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, int SIMULATION_TYPE);
+
+__global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,int* d_phase_ispec_inner_acoustic,
+ int num_phase_ispec_acoustic, int d_iphase,
+ float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
+ float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz,
+ float* d_gammax, float* d_gammay, float* d_gammaz,
+ float* hprime_xx, float* hprimewgll_xx,
+ float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,
+ float* d_rhostore);
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// main compute_forces_acoustic CUDA routine
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_forces_acoustic_cuda,
+ COMPUTE_FORCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
+ int* iphase,
+ int* nspec_outer_acoustic,
+ int* nspec_inner_acoustic,
+ int* SIMULATION_TYPE) {
+
+TRACE("compute_forces_acoustic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ int num_elements;
+
+ if( *iphase == 1 )
+ num_elements = *nspec_outer_acoustic;
+ else
+ num_elements = *nspec_inner_acoustic;
+
+ //int myrank;
+ /* MPI_Comm_rank(MPI_COMM_WORLD,&myrank); */
+ /* if(myrank==0) { */
+
+ Kernel_2_acoustic(num_elements, mp, *iphase, *SIMULATION_TYPE);
+
+ cudaThreadSynchronize();
+/* MPI_Barrier(MPI_COMM_WORLD); */
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+/* KERNEL 2 */
+/* ----------------------------------------------------------------------------------------------- */
+
+void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, int SIMULATION_TYPE)
+{
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("before acoustic kernel Kernel 2");
+#endif
+
+ /* if the grid can handle the number of blocks, we let it be 1D */
+ /* grid_2_x = nb_elem_color; */
+ /* nb_elem_color is just how many blocks we are computing now */
+
+ int num_blocks_x = nb_blocks_to_compute;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ int threads_2 = 128;//BLOCK_SIZE_K2;
+ dim3 grid_2(num_blocks_x,num_blocks_y);
+
+
+ // Cuda timing
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
+ // cudaEventRecord( start, 0 );
+
+ Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+ mp->d_phase_ispec_inner_acoustic, mp->num_phase_ispec_acoustic, d_iphase,
+ mp->d_potential_acoustic, mp->d_potential_dot_dot_acoustic,
+ mp->d_xix, mp->d_xiy, mp->d_xiz,
+ mp->d_etax, mp->d_etay, mp->d_etaz,
+ mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+ mp->d_hprime_xx, mp->d_hprimewgll_xx,
+ mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+ mp->d_rhostore);
+
+ if(SIMULATION_TYPE == 3) {
+ Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+ mp->d_phase_ispec_inner_acoustic,mp->num_phase_ispec_acoustic, d_iphase,
+ mp->d_b_potential_acoustic, mp->d_b_potential_dot_dot_acoustic,
+ mp->d_xix, mp->d_xiy, mp->d_xiz,
+ mp->d_etax, mp->d_etay, mp->d_etaz,
+ mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+ mp->d_hprime_xx, mp->d_hprimewgll_xx,
+ mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
+ mp->d_rhostore);
+ }
+
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("Kernel2 Execution Time: %f ms\n",time);
+
+ /* cudaThreadSynchronize(); */
+ /* TRACE("Kernel 2 finished"); */
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("Tried to start with %dx1 blocks\n",nb_blocks_to_compute);
+ exit_on_cuda_error("kernel Kernel_2");
+#endif
+}
+
+
+/* KERNEL 2 on device*/
+
+//typedef double reald;
+//typedef float reald;
+
+__global__ void Kernel_2_acoustic_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,
+ int* d_phase_ispec_inner_acoustic,
+ int num_phase_ispec_acoustic, int d_iphase,
+ float* d_potential_acoustic, float* d_potential_dot_dot_acoustic,
+ float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz,
+ float* d_gammax, float* d_gammay, float* d_gammaz,
+ float* hprime_xx, float* hprimewgll_xx,
+ float* wgllwgll_xy,float* wgllwgll_xz,float* wgllwgll_yz,
+ float* d_rhostore){
+
+ /* int bx = blockIdx.y*blockDim.x+blockIdx.x; //possible bug in original code*/
+ int bx = blockIdx.y*gridDim.x+blockIdx.x;
+ /* int bx = blockIdx.x; */
+ int tx = threadIdx.x;
+
+
+
+ //const int NGLLX = 5;
+ // const int NGLL2 = 25;
+ const int NGLL3 = 125;
+ const int NGLL3_ALIGN = 128;
+
+ int K = (tx/NGLL2);
+ int J = ((tx-K*NGLL2)/NGLLX);
+ int I = (tx-K*NGLL2-J*NGLLX);
+
+ int active,offset,offset1,offset2,offset3;
+ int iglob = 0;
+ int working_element;
+ reald temp1l,temp2l,temp3l;
+ reald xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl;
+ reald dpotentialdxl,dpotentialdyl,dpotentialdzl;
+ reald fac1,fac2,fac3,rho_invl;
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+ int l;
+ float hp1,hp2,hp3;
+#endif
+
+ __shared__ reald s_dummy_loc[NGLL3];
+
+ __shared__ reald s_temp1[NGLL3];
+ __shared__ reald s_temp2[NGLL3];
+ __shared__ reald s_temp3[NGLL3];
+
+// use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
+// because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
+ active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
+
+// copy from global memory to shared memory
+// each thread writes one of the NGLL^3 = 125 data points
+ if (active) {
+ // iphase-1 and working_element-1 for Fortran->C array conventions
+ working_element = d_phase_ispec_inner_acoustic[bx + num_phase_ispec_acoustic*(d_iphase-1)]-1;
+ // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
+ iglob = d_ibool[working_element*125 + tx]-1;
+
+#ifdef USE_TEXTURES
+ s_dummy_loc[tx] = tex1Dfetch(tex_potential_acoustic, iglob);
+#else
+ // changing iglob indexing to match fortran row changes fast style
+ s_dummy_loc[tx] = d_potential_acoustic[iglob];
+#endif
+ }
+
+// synchronize all the threads (one thread for each of the NGLL grid points of the
+// current spectral element) because we need the whole element to be ready in order
+// to be able to compute the matrix products along cut planes of the 3D element below
+ __syncthreads();
+
+#ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+
+ if (active) {
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// if(iglob == 0 )printf("kernel 2: iglob %i hprime_xx %f %f %f \n",iglob,hprime_xx[0],hprime_xx[1],hprime_xx[2]);
+#endif
+
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+
+ temp1l = 0.f;
+ temp2l = 0.f;
+ temp3l = 0.f;
+
+
+ for (l=0;l<NGLLX;l++) {
+ hp1 = hprime_xx[l*NGLLX+I];
+ offset1 = K*NGLL2+J*NGLLX+l;
+ temp1l += s_dummy_loc[offset1]*hp1;
+
+ // daniel: assumes that hprime_xx = hprime_yy = hprime_zz
+ hp2 = hprime_xx[l*NGLLX+J];
+ offset2 = K*NGLL2+l*NGLLX+I;
+ temp2l += s_dummy_loc[offset2]*hp2;
+
+ hp3 = hprime_xx[l*NGLLX+K];
+ offset3 = l*NGLL2+J*NGLLX+I;
+ temp3l += s_dummy_loc[offset3]*hp3;
+ }
+#else
+
+ temp1l = s_dummy_loc[K*NGLL2+J*NGLLX]*hprime_xx[I]
+ + s_dummy_loc[K*NGLL2+J*NGLLX+1]*hprime_xx[NGLLX+I]
+ + s_dummy_loc[K*NGLL2+J*NGLLX+2]*hprime_xx[2*NGLLX+I]
+ + s_dummy_loc[K*NGLL2+J*NGLLX+3]*hprime_xx[3*NGLLX+I]
+ + s_dummy_loc[K*NGLL2+J*NGLLX+4]*hprime_xx[4*NGLLX+I];
+
+ temp2l = s_dummy_loc[K*NGLL2+I]*hprime_xx[J]
+ + s_dummy_loc[K*NGLL2+NGLLX+I]*hprime_xx[NGLLX+J]
+ + s_dummy_loc[K*NGLL2+2*NGLLX+I]*hprime_xx[2*NGLLX+J]
+ + s_dummy_loc[K*NGLL2+3*NGLLX+I]*hprime_xx[3*NGLLX+J]
+ + s_dummy_loc[K*NGLL2+4*NGLLX+I]*hprime_xx[4*NGLLX+J];
+
+ temp3l = s_dummy_loc[J*NGLLX+I]*hprime_xx[K]
+ + s_dummy_loc[NGLL2+J*NGLLX+I]*hprime_xx[NGLLX+K]
+ + s_dummy_loc[2*NGLL2+J*NGLLX+I]*hprime_xx[2*NGLLX+K]
+ + s_dummy_loc[3*NGLL2+J*NGLLX+I]*hprime_xx[3*NGLLX+K]
+ + s_dummy_loc[4*NGLL2+J*NGLLX+I]*hprime_xx[4*NGLLX+K];
+
+#endif
+
+ // compute derivatives of ux, uy and uz with respect to x, y and z
+ offset = working_element*NGLL3_ALIGN + tx;
+
+ xixl = d_xix[offset];
+ xiyl = d_xiy[offset];
+ xizl = d_xiz[offset];
+ etaxl = d_etax[offset];
+ etayl = d_etay[offset];
+ etazl = d_etaz[offset];
+ gammaxl = d_gammax[offset];
+ gammayl = d_gammay[offset];
+ gammazl = d_gammaz[offset];
+
+ jacobianl = 1.f / (xixl*(etayl*gammazl-etazl*gammayl)
+ -xiyl*(etaxl*gammazl-etazl*gammaxl)
+ +xizl*(etaxl*gammayl-etayl*gammaxl));
+
+ // derivatives of potential
+ dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l;
+ dpotentialdyl = xiyl*temp1l + etayl*temp2l + gammayl*temp3l;
+ dpotentialdzl = xizl*temp1l + etazl*temp2l + gammazl*temp3l;
+
+ // density (reciproc)
+ rho_invl = 1.f / d_rhostore[offset];
+
+ // form the dot product with the test vector
+ s_temp1[tx] = jacobianl * rho_invl * (dpotentialdxl*xixl + dpotentialdyl*xiyl + dpotentialdzl*xizl);
+ s_temp2[tx] = jacobianl * rho_invl * (dpotentialdxl*etaxl + dpotentialdyl*etayl + dpotentialdzl*etazl);
+ s_temp3[tx] = jacobianl * rho_invl * (dpotentialdxl*gammaxl + dpotentialdyl*gammayl + dpotentialdzl*gammazl);
+ }
+
+// synchronize all the threads (one thread for each of the NGLL grid points of the
+// current spectral element) because we need the whole element to be ready in order
+// to be able to compute the matrix products along cut planes of the 3D element below
+ __syncthreads();
+
+ if (active) {
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+
+ temp1l = 0.f;
+ temp2l = 0.f;
+ temp3l = 0.f;
+
+ for (l=0;l<NGLLX;l++) {
+ fac1 = hprimewgll_xx[I*NGLLX+l];
+ offset1 = K*NGLL2+J*NGLLX+l;
+ temp1l += s_temp1[offset1]*fac1;
+
+ //daniel: assumes hprimewgll_xx = hprimewgll_yy = hprimewgll_zz
+ fac2 = hprimewgll_xx[J*NGLLX+l];
+ offset2 = K*NGLL2+l*NGLLX+I;
+ temp2l += s_temp2[offset2]*fac2;
+
+ fac3 = hprimewgll_xx[K*NGLLX+l];
+ offset3 = l*NGLL2+J*NGLLX+I;
+ temp3l += s_temp3[offset3]*fac3;
+ }
+#else
+
+ temp1l = s_temp1[K*NGLL2+J*NGLLX]*hprimewgll_xx[I*NGLLX]
+ + s_temp1[K*NGLL2+J*NGLLX+1]*hprimewgll_xx[I*NGLLX+1]
+ + s_temp1[K*NGLL2+J*NGLLX+2]*hprimewgll_xx[I*NGLLX+2]
+ + s_temp1[K*NGLL2+J*NGLLX+3]*hprimewgll_xx[I*NGLLX+3]
+ + s_temp1[K*NGLL2+J*NGLLX+4]*hprimewgll_xx[I*NGLLX+4];
+
+
+ temp2l = s_temp2[K*NGLL2+I]*hprimewgll_xx[J*NGLLX]
+ + s_temp2[K*NGLL2+NGLLX+I]*hprimewgll_xx[J*NGLLX+1]
+ + s_temp2[K*NGLL2+2*NGLLX+I]*hprimewgll_xx[J*NGLLX+2]
+ + s_temp2[K*NGLL2+3*NGLLX+I]*hprimewgll_xx[J*NGLLX+3]
+ + s_temp2[K*NGLL2+4*NGLLX+I]*hprimewgll_xx[J*NGLLX+4];
+
+
+ temp3l = s_temp3[J*NGLLX+I]*hprimewgll_xx[K*NGLLX]
+ + s_temp3[NGLL2+J*NGLLX+I]*hprimewgll_xx[K*NGLLX+1]
+ + s_temp3[2*NGLL2+J*NGLLX+I]*hprimewgll_xx[K*NGLLX+2]
+ + s_temp3[3*NGLL2+J*NGLLX+I]*hprimewgll_xx[K*NGLLX+3]
+ + s_temp3[4*NGLL2+J*NGLLX+I]*hprimewgll_xx[K*NGLLX+4];
+
+
+#endif
+
+ fac1 = wgllwgll_yz[K*NGLLX+J];
+ fac2 = wgllwgll_xz[K*NGLLX+I];
+ fac3 = wgllwgll_xy[J*NGLLX+I];
+
+#ifdef USE_TEXTURES
+ d_potential_dot_dot_acoustic[iglob] = tex1Dfetch(tex_potential_dot_dot_acoustic, iglob)
+ - (fac1*temp1l + fac2*temp2l + fac3*temp3l);
+#else
+ /* OLD version that uses coloring to get around race condition. About 1.6x faster */
+ // d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
+ // d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
+ // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+
+ atomicAdd(&d_potential_dot_dot_acoustic[iglob],-(fac1*temp1l + fac2*temp2l + fac3*temp3l));
+
+#endif
+ }
+
+#else // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+ d_potential_dot_dot_acoustic[iglob] = 123.123f;
+#endif // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+/* KERNEL 3 */
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void kernel_3_a_acoustic_cuda_device(float* potential_dot_dot_acoustic,
+ int size,
+ float* rmass_acoustic) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ // multiplies pressure with the inverse of the mass matrix
+ potential_dot_dot_acoustic[id] = potential_dot_dot_acoustic[id]*rmass_acoustic[id];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void kernel_3_b_acoustic_cuda_device(float* potential_dot_acoustic,
+ float* potential_dot_dot_acoustic,
+ int size,
+ real deltatover2,
+ float* rmass_acoustic) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ // Newmark time scheme: corrector term
+ potential_dot_acoustic[id] = potential_dot_acoustic[id] + deltatover2*potential_dot_dot_acoustic[id];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
+ long* Mesh_pointer,
+ int* size_F,
+ int* SIMULATION_TYPE) {
+
+TRACE("kernel_3_a_acoustic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
+ int size = *size_F;
+
+ int blocksize=128;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_dot_acoustic,
+ size,
+ mp->d_rmass_acoustic);
+
+ if(*SIMULATION_TYPE == 3) {
+ kernel_3_a_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_dot_acoustic,
+ size,
+ mp->d_rmass_acoustic);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("after kernel 3 a");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
+ long* Mesh_pointer,
+ int* size_F,
+ float* deltatover2_F,
+ int* SIMULATION_TYPE,
+ float* b_deltatover2_F) {
+
+TRACE("kernel_3_b_acoustic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
+ int size = *size_F;
+ real deltatover2 = *deltatover2_F;
+ real b_deltatover2 = *b_deltatover2_F;
+
+ int blocksize=128;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_potential_dot_acoustic,
+ mp->d_potential_dot_dot_acoustic,
+ size, deltatover2,
+ mp->d_rmass_acoustic);
+
+ if(*SIMULATION_TYPE == 3) {
+ kernel_3_b_acoustic_cuda_device<<< grid, threads>>>(mp->d_b_potential_dot_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
+ size, b_deltatover2,
+ mp->d_rmass_acoustic);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("after kernel 3 b");
+#endif
+}
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+/* KERNEL for enforce free surface */
+/* ----------------------------------------------------------------------------------------------- */
+
+#define INDEX(i,j,k,ispec) i + (j)*5 + (k)*25 + (ispec)*125
+#define INDEX_IJK(x,y,z) x + (y)*3 + (z)*3*25
+
+__global__ void enforce_free_surface_cuda_kernel(
+ float* potential_acoustic,
+ float* potential_dot_acoustic,
+ float* potential_dot_dot_acoustic,
+ int num_free_surface_faces,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* ibool,
+ int* ispec_is_acoustic) {
+ // gets spectral element face id
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
+ // for all faces on free surface
+ if( iface < num_free_surface_faces ){
+
+ int ispec = free_surface_ispec[iface]-1;
+
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// if( iface > 648-1 ){printf("device iface: %i \n",iface);}
+//#endif
+
+ // checks if element is in acoustic domain
+ if( ispec_is_acoustic[ispec] == 1 ){
+
+ // gets global point index
+ int tx = threadIdx.x + threadIdx.y*blockDim.x;
+
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// if( tx > 25-1 ){printf("device tx: %i \n",tx);}
+//#endif
+
+ int i = free_surface_ijk[INDEX_IJK(0,tx,iface)] - 1;
+ int j = free_surface_ijk[INDEX_IJK(1,tx,iface)] - 1;
+ int k = free_surface_ijk[INDEX_IJK(2,tx,iface)] - 1;
+
+ int iglob = ibool[INDEX(i,j,k,ispec)] - 1;
+
+ // sets potentials to zero at free surface
+ potential_acoustic[iglob] = 0;
+ potential_dot_acoustic[iglob] = 0;
+ potential_dot_dot_acoustic[iglob] = 0;
+
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// if( ispec == 160 && tx < 25 ){printf("device: %i %i %i %i %i \n",tx,i,j,k,iglob);}
+//#endif
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(acoustic_enforce_free_surface_cuda,
+ ACOUSTIC_ENFORCE_FREE_SURFACE_CUDA)(long* Mesh_pointer_f,
+ int* SIMULATION_TYPE,
+ int* ABSORB_FREE_SURFACE) {
+
+TRACE("acoustic_enforce_free_surface_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // checks if anything to do
+ if( *ABSORB_FREE_SURFACE == 0 ){
+ // block sizes
+ int num_blocks_x = mp->num_free_surface_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y,1);
+ dim3 threads(25,1,1);
+
+ //#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ // debugging
+ //int* d_debug;
+ //printf("acoustic_enforce_free_surface_cuda ...\n");
+ //print_CUDA_error_if_any(cudaMalloc((void**)&d_debug,128*sizeof(int)),999);
+
+ //int* h_debug;
+ //h_debug = (int*) calloc(128,sizeof(int));
+ //for(int i=0;i<128;i++){h_debug[i] = 0;}
+ //cudaMemcpy(d_debug,h_debug,128*sizeof(int),cudaMemcpyHostToDevice);
+
+ //printf("acoustic_enforce_free_surface_cuda start...\n");
+ //doesnt' work...: printf("free_surface_ispec: %i %i %i \n",mp->d_free_surface_ispec[0],mp->d_free_surface_ispec[1],mp->d_free_surface_ispec[2]);
+ //printf("free_surface_ispec: %i \n",mp->num_free_surface_faces);
+
+ //cudaThreadSynchronize();
+ //#endif
+
+ // sets potentials to zero at free surface
+ enforce_free_surface_cuda_kernel<<<grid,threads>>>(mp->d_potential_acoustic,
+ mp->d_potential_dot_acoustic,
+ mp->d_potential_dot_dot_acoustic,
+ mp->num_free_surface_faces,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ mp->d_ibool,
+ mp->d_ispec_is_acoustic);
+ // for backward/reconstructed potentials
+ if(*SIMULATION_TYPE == 3) {
+ enforce_free_surface_cuda_kernel<<<grid,threads>>>(mp->d_b_potential_acoustic,
+ mp->d_b_potential_dot_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
+ mp->num_free_surface_faces,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ mp->d_ibool,
+ mp->d_ispec_is_acoustic);
+
+ }
+ //#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //cudaThreadSynchronize();
+ //cudaMemcpy(h_debug,d_debug,128*sizeof(int),cudaMemcpyDeviceToHost);
+ //for(int i=0;i<25;i++) {printf("ispec d_debug = %d \n",h_debug[i]);}
+ //cudaFree(d_debug);
+ //free(h_debug);
+ //exit(1);
+ //#endif
+
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("enforce_free_surface_cuda");
+#endif
+}
+
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_cuda.cu)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,1037 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+// #include "epik_user.h"
+
+
+// cuda constant arrays
+__constant__ float d_hprime_xx[NGLL2];
+__constant__ float d_hprime_yy[NGLL2]; // daniel: check if hprime_yy == hprime_xx
+__constant__ float d_hprime_zz[NGLL2]; // daniel: check if hprime_zz == hprime_xx
+__constant__ float d_hprimewgll_xx[NGLL2];
+__constant__ float d_wgllwgll_xy[NGLL2];
+__constant__ float d_wgllwgll_xz[NGLL2];
+__constant__ float d_wgllwgll_yz[NGLL2];
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void Kernel_2(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
+ int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE);
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// prepares a device array with with all inter-element edge-nodes -- this
+// is followed by a memcpy and MPI operations
+__global__ void prepare_boundary_accel_on_device(float* d_accel, float* d_send_accel_buffer,
+ int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
+ int* d_nibool_interfaces_ext_mesh,
+ int* d_ibool_interfaces_ext_mesh) {
+
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+ //int bx = blockIdx.y*gridDim.x+blockIdx.x;
+ //int tx = threadIdx.x;
+ int iinterface=0;
+
+ for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
+ if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)] =
+ d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)];
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1] =
+ d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1];
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2] =
+ d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2];
+ }
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// prepares and transfers the inter-element edge-nodes to the host to be MPI'd
+extern "C"
+void FC_FUNC_(transfer_boundary_accel_from_device,
+ TRANSFER_BOUNDARY_ACCEL_FROM_DEVICE)(int* size, long* Mesh_pointer_f, float* accel,
+ float* send_accel_buffer,
+ int* num_interfaces_ext_mesh,
+ int* max_nibool_interfaces_ext_mesh,
+ int* nibool_interfaces_ext_mesh,
+ int* ibool_interfaces_ext_mesh,
+ int* FORWARD_OR_ADJOINT){
+TRACE("transfer_boundary_accel_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+
+
+ int blocksize = 256;
+ int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ //timing for memory xfer
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
+ // cudaEventRecord( start, 0 );
+ if(*FORWARD_OR_ADJOINT == 1) {
+ prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel,mp->d_send_accel_buffer,
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
+ }
+ else if(*FORWARD_OR_ADJOINT == 3) {
+ prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel,mp->d_send_accel_buffer,
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_boundary_accel_from_device");
+#endif
+
+
+ cudaMemcpy(send_accel_buffer,mp->d_send_accel_buffer,
+ 3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real),cudaMemcpyDeviceToHost);
+
+ // finish timing of kernel+memcpy
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("boundary xfer d->h Time: %f ms\n",time);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void assemble_boundary_accel_on_device(float* d_accel, float* d_send_accel_buffer,
+ int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
+ int* d_nibool_interfaces_ext_mesh,
+ int* d_ibool_interfaces_ext_mesh) {
+
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+ //int bx = blockIdx.y*gridDim.x+blockIdx.x;
+ //int tx = threadIdx.x;
+ int iinterface=0;
+
+ for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
+ if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
+
+ // for testing atomic operations against not atomic operations (0.1ms vs. 0.04 ms)
+ // d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)] +=
+ // d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)];
+ // d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1] +=
+ // d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1];
+ // d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2] +=
+ // d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2];
+
+
+ atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)],
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)]);
+ atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1],
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1]);
+ atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2],
+ d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2]);
+ }
+ }
+ // ! This step is done via previous function transfer_and_assemble...
+ // ! do iinterface = 1, num_interfaces_ext_mesh
+ // ! do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ // ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ // ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
+ // ! enddo
+ // ! enddo
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// FORWARD_OR_ADJOINT == 1 for accel, and == 3 for b_accel
+extern "C"
+void FC_FUNC_(transfer_and_assemble_accel_to_device,
+ TRANSFER_AND_ASSEMBLE_ACCEL_TO_DEVICE)(long* Mesh_pointer, real* accel,
+ real* buffer_recv_vector_ext_mesh,
+ int* num_interfaces_ext_mesh,
+ int* max_nibool_interfaces_ext_mesh,
+ int* nibool_interfaces_ext_mesh,
+ int* ibool_interfaces_ext_mesh,int* FORWARD_OR_ADJOINT) {
+TRACE("transfer_and_assemble_accel_to_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(mp->d_send_accel_buffer, buffer_recv_vector_ext_mesh, 3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real), cudaMemcpyHostToDevice);
+
+ int blocksize = 256;
+ int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ //double start_time = get_time();
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
+ // cudaEventRecord( start, 0 );
+ if(*FORWARD_OR_ADJOINT == 1) { //assemble forward accel
+ assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel, mp->d_send_accel_buffer,
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
+ }
+ else if(*FORWARD_OR_ADJOINT == 3) { //assemble adjoint accel
+ assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel, mp->d_send_accel_buffer,
+ *num_interfaces_ext_mesh,
+ *max_nibool_interfaces_ext_mesh,
+ mp->d_nibool_interfaces_ext_mesh,
+ mp->d_ibool_interfaces_ext_mesh);
+ }
+
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("Boundary Assemble Kernel Execution Time: %f ms\n",time);
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+ exit_on_cuda_error("transfer_and_assemble_accel_to_device_");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(compute_forces_elastic_cuda,
+ COMPUTE_FORCES_ELASTIC_CUDA)(long* Mesh_pointer_f,
+ int* iphase,
+ int* nspec_outer_elastic,
+ int* nspec_inner_elastic,
+ int* COMPUTE_AND_STORE_STRAIN,
+ int* SIMULATION_TYPE) {
+
+TRACE("compute_forces_elastic_cuda");
+// EPIK_TRACER("compute_forces_elastic_cuda");
+//printf("Running compute_forces\n");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ int num_elements;
+
+ if( *iphase == 1 )
+ num_elements = *nspec_outer_elastic;
+ else
+ num_elements = *nspec_inner_elastic;
+
+ //int myrank;
+ /* MPI_Comm_rank(MPI_COMM_WORLD,&myrank); */
+ /* if(myrank==0) { */
+
+ Kernel_2(num_elements, mp, *iphase, *COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE);
+
+
+ cudaThreadSynchronize();
+/* MPI_Barrier(MPI_COMM_WORLD); */
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase, int* d_ibool);
+
+__global__ void Kernel_2_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase,float* d_displ, float* d_accel, float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz, float* d_gammax, float* d_gammay, float* d_gammaz, float* d_kappav, float* d_muv,float* d_debug,int COMPUTE_AND_STORE_STRAIN,float* epsilondev_xx,float* epsilondev_yy,float* epsilondev_xy,float* epsilondev_xz,float* epsilondev_yz,float* epsilon_trace_over_3,int SIMULATION_TYPE);
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void Kernel_2(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
+ int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE)
+ {
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("before kernel Kernel 2");
+#endif
+
+ /* if the grid can handle the number of blocks, we let it be 1D */
+ /* grid_2_x = nb_elem_color; */
+ /* nb_elem_color is just how many blocks we are computing now */
+
+ int num_blocks_x = nb_blocks_to_compute;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ int threads_2 = 128;//BLOCK_SIZE_K2;
+ dim3 grid_2(num_blocks_x,num_blocks_y);
+
+ // debugging
+ //printf("Starting with grid %dx%d for %d blocks\n",num_blocks_x,num_blocks_y,nb_blocks_to_compute);
+ float* d_debug, *h_debug;
+ h_debug = (float*)calloc(128,sizeof(float));
+ cudaMalloc((void**)&d_debug,128*sizeof(float));
+ cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
+
+ // Cuda timing
+ // cudaEvent_t start, stop;
+ // float time;
+ // cudaEventCreate(&start);
+ // cudaEventCreate(&stop);
+ // cudaEventRecord( start, 0 );
+
+ Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+ mp->d_phase_ispec_inner_elastic,
+ mp->d_num_phase_ispec_elastic, d_iphase,
+ mp->d_displ, mp->d_accel,
+ mp->d_xix, mp->d_xiy, mp->d_xiz,
+ mp->d_etax, mp->d_etay, mp->d_etaz,
+ mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+ mp->d_kappav, mp->d_muv,d_debug,
+ COMPUTE_AND_STORE_STRAIN,
+ mp->d_epsilondev_xx,
+ mp->d_epsilondev_yy,
+ mp->d_epsilondev_xy,
+ mp->d_epsilondev_xz,
+ mp->d_epsilondev_yz,
+ mp->d_epsilon_trace_over_3,
+ // 1);
+ SIMULATION_TYPE);
+
+
+ // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+ // int procid;
+ // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ // if(procid==0) {
+ // for(int i=0;i<17;i++) {
+ // printf("cudadebug[%d] = %e\n",i,h_debug[i]);
+ // }
+ // }
+ free(h_debug);
+ cudaFree(d_debug);
+ #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("Kernel_2_impl");
+ #endif
+
+ if(SIMULATION_TYPE == 3) {
+ Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
+ mp->d_phase_ispec_inner_elastic,
+ mp->d_num_phase_ispec_elastic, d_iphase,
+ mp->d_b_displ, mp->d_b_accel,
+ mp->d_xix, mp->d_xiy, mp->d_xiz,
+ mp->d_etax, mp->d_etay, mp->d_etaz,
+ mp->d_gammax, mp->d_gammay, mp->d_gammaz,
+ mp->d_kappav, mp->d_muv,d_debug,
+ COMPUTE_AND_STORE_STRAIN,
+ mp->d_b_epsilondev_xx,
+ mp->d_b_epsilondev_yy,
+ mp->d_b_epsilondev_xy,
+ mp->d_b_epsilondev_xz,
+ mp->d_b_epsilondev_yz,
+ mp->d_b_epsilon_trace_over_3,
+ SIMULATION_TYPE);
+ }
+
+ // cudaEventRecord( stop, 0 );
+ // cudaEventSynchronize( stop );
+ // cudaEventElapsedTime( &time, start, stop );
+ // cudaEventDestroy( start );
+ // cudaEventDestroy( stop );
+ // printf("Kernel2 Execution Time: %f ms\n",time);
+
+ // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+ // for(int i=0;i<10;i++) {
+ // printf("debug[%d]=%e\n",i,h_debug[i]);
+ // }
+
+ /* cudaThreadSynchronize(); */
+ /* LOG("Kernel 2 finished"); */
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("Kernel_2_impl SIM_TYPE==3");
+ #endif
+
+ }
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase, int* d_ibool) {
+ int bx = blockIdx.x;
+ int tx = threadIdx.x;
+ int working_element;
+ //int ispec;
+ //int NGLL3_ALIGN = 128;
+ if(tx==0 && bx==0) {
+
+ d_debug_output[0] = 420.0;
+
+ d_debug_output[2] = num_phase_ispec_elastic;
+ d_debug_output[3] = d_iphase;
+ working_element = d_phase_ispec_inner_elastic[bx + num_phase_ispec_elastic*(d_iphase-1)]-1;
+ d_debug_output[4] = working_element;
+ d_debug_output[5] = d_phase_ispec_inner_elastic[0];
+ /* d_debug_output[1] = d_ibool[working_element*NGLL3_ALIGN + tx]-1; */
+ }
+ /* d_debug_output[1+tx+128*bx] = 69.0; */
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// double precision temporary variables leads to 10% performance
+// decrease in Kernel_2_impl (not very much..)
+//typedef float reald;
+
+// doesn't seem to change the performance.
+// #define MANUALLY_UNROLLED_LOOPS
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void Kernel_2_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase,float* d_displ, float* d_accel, float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz, float* d_gammax, float* d_gammay, float* d_gammaz, float* d_kappav, float* d_muv,float* d_debug,int COMPUTE_AND_STORE_STRAIN,float* epsilondev_xx,float* epsilondev_yy,float* epsilondev_xy,float* epsilondev_xz,float* epsilondev_yz,float* epsilon_trace_over_3,int SIMULATION_TYPE)
+{
+
+ /* int bx = blockIdx.y*blockDim.x+blockIdx.x; //possible bug in original code*/
+ int bx = blockIdx.y*gridDim.x+blockIdx.x;
+ /* int bx = blockIdx.x; */
+ int tx = threadIdx.x;
+
+
+
+ //const int NGLLX = 5;
+ // const int NGLL2 = 25;
+ const int NGLL3 = 125;
+ const int NGLL3_ALIGN = 128;
+
+ int K = (tx/NGLL2);
+ int J = ((tx-K*NGLL2)/NGLLX);
+ int I = (tx-K*NGLL2-J*NGLLX);
+
+ int active,offset;
+ int iglob = 0;
+ int working_element;
+ reald tempx1l,tempx2l,tempx3l,tempy1l,tempy2l,tempy3l,tempz1l,tempz2l,tempz3l;
+ reald xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl;
+ reald duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl;
+ reald duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl;
+ reald duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl;
+ reald fac1,fac2,fac3,lambdal,mul,lambdalplus2mul,kappal;
+ reald sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz;
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+ int l;
+ float hp1,hp2,hp3;
+#endif
+
+ __shared__ reald s_dummyx_loc[NGLL3];
+ __shared__ reald s_dummyy_loc[NGLL3];
+ __shared__ reald s_dummyz_loc[NGLL3];
+
+ __shared__ reald s_tempx1[NGLL3];
+ __shared__ reald s_tempx2[NGLL3];
+ __shared__ reald s_tempx3[NGLL3];
+ __shared__ reald s_tempy1[NGLL3];
+ __shared__ reald s_tempy2[NGLL3];
+ __shared__ reald s_tempy3[NGLL3];
+ __shared__ reald s_tempz1[NGLL3];
+ __shared__ reald s_tempz2[NGLL3];
+ __shared__ reald s_tempz3[NGLL3];
+
+// use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
+// because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
+ active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
+
+// copy from global memory to shared memory
+// each thread writes one of the NGLL^3 = 125 data points
+ if (active) {
+ // iphase-1 and working_element-1 for Fortran->C array conventions
+ working_element = d_phase_ispec_inner_elastic[bx + num_phase_ispec_elastic*(d_iphase-1)]-1;
+ // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
+ iglob = d_ibool[working_element*125 + tx]-1;
+
+#ifdef USE_TEXTURES
+ s_dummyx_loc[tx] = tex1Dfetch(tex_displ, iglob);
+ s_dummyy_loc[tx] = tex1Dfetch(tex_displ, iglob + NGLOB);
+ s_dummyz_loc[tx] = tex1Dfetch(tex_displ, iglob + 2*NGLOB);
+#else
+ // changing iglob indexing to match fortran row changes fast style
+ s_dummyx_loc[tx] = d_displ[iglob*3];
+ s_dummyy_loc[tx] = d_displ[iglob*3 + 1];
+ s_dummyz_loc[tx] = d_displ[iglob*3 + 2];
+#endif
+ }
+
+// synchronize all the threads (one thread for each of the NGLL grid points of the
+// current spectral element) because we need the whole element to be ready in order
+// to be able to compute the matrix products along cut planes of the 3D element below
+ __syncthreads();
+
+#ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+
+ if (active) {
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+
+ tempx1l = 0.f;
+ tempx2l = 0.f;
+ tempx3l = 0.f;
+
+ tempy1l = 0.f;
+ tempy2l = 0.f;
+ tempy3l = 0.f;
+
+ tempz1l = 0.f;
+ tempz2l = 0.f;
+ tempz3l = 0.f;
+
+ for (l=0;l<NGLLX;l++) {
+ hp1 = d_hprime_xx[l*NGLLX+I];
+ offset = K*NGLL2+J*NGLLX+l;
+ tempx1l += s_dummyx_loc[offset]*hp1;
+ tempy1l += s_dummyy_loc[offset]*hp1;
+ tempz1l += s_dummyz_loc[offset]*hp1;
+
+ hp2 = d_hprime_xx[l*NGLLX+J];
+ offset = K*NGLL2+l*NGLLX+I;
+ tempx2l += s_dummyx_loc[offset]*hp2;
+ tempy2l += s_dummyy_loc[offset]*hp2;
+ tempz2l += s_dummyz_loc[offset]*hp2;
+
+ hp3 = d_hprime_xx[l*NGLLX+K];
+ offset = l*NGLL2+J*NGLLX+I;
+ tempx3l += s_dummyx_loc[offset]*hp3;
+ tempy3l += s_dummyy_loc[offset]*hp3;
+ tempz3l += s_dummyz_loc[offset]*hp3;
+
+ // if(working_element == 169 && tx == 0) {
+ // atomicAdd(&d_debug[0],1.0);
+ // d_debug[1+3*l] = tempz3l;
+ // d_debug[2+3*l] = s_dummyz_loc[offset];
+ // d_debug[3+3*l] = hp3;
+ // }
+
+ }
+#else
+
+ tempx1l = s_dummyx_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+ + s_dummyx_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+ tempy1l = s_dummyy_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+ + s_dummyy_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+ tempz1l = s_dummyz_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+ + s_dummyz_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+ tempx2l = s_dummyx_loc[K*NGLL2+I]*d_hprime_xx[J]
+ + s_dummyx_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+ + s_dummyx_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+ + s_dummyx_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+ + s_dummyx_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+ tempy2l = s_dummyy_loc[K*NGLL2+I]*d_hprime_xx[J]
+ + s_dummyy_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+ + s_dummyy_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+ + s_dummyy_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+ + s_dummyy_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+ tempz2l = s_dummyz_loc[K*NGLL2+I]*d_hprime_xx[J]
+ + s_dummyz_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+ + s_dummyz_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+ + s_dummyz_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+ + s_dummyz_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+ tempx3l = s_dummyx_loc[J*NGLLX+I]*d_hprime_xx[K]
+ + s_dummyx_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+ + s_dummyx_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+ + s_dummyx_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+ + s_dummyx_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+
+ tempy3l = s_dummyy_loc[J*NGLLX+I]*d_hprime_xx[K]
+ + s_dummyy_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+ + s_dummyy_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+ + s_dummyy_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+ + s_dummyy_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+
+ tempz3l = s_dummyz_loc[J*NGLLX+I]*d_hprime_xx[K]
+ + s_dummyz_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+ + s_dummyz_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+ + s_dummyz_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+ + s_dummyz_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+
+#endif
+
+// compute derivatives of ux, uy and uz with respect to x, y and z
+ offset = working_element*NGLL3_ALIGN + tx;
+
+ xixl = d_xix[offset];
+ xiyl = d_xiy[offset];
+ xizl = d_xiz[offset];
+ etaxl = d_etax[offset];
+ etayl = d_etay[offset];
+ etazl = d_etaz[offset];
+ gammaxl = d_gammax[offset];
+ gammayl = d_gammay[offset];
+ gammazl = d_gammaz[offset];
+
+ duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l;
+ duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l;
+ duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l;
+
+ duydxl = xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l;
+ duydyl = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l;
+ duydzl = xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l;
+
+ duzdxl = xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l;
+ duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l;
+ duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l;
+
+
+
+ duxdxl_plus_duydyl = duxdxl + duydyl;
+ duxdxl_plus_duzdzl = duxdxl + duzdzl;
+ duydyl_plus_duzdzl = duydyl + duzdzl;
+ duxdyl_plus_duydxl = duxdyl + duydxl;
+ duzdxl_plus_duxdzl = duzdxl + duxdzl;
+ duzdyl_plus_duydzl = duzdyl + duydzl;
+
+ if(COMPUTE_AND_STORE_STRAIN) {
+ float templ = 1.0f/3.0f * (duxdxl + duydyl + duzdzl);
+ epsilondev_xx[offset] = duxdxl - templ;
+ epsilondev_yy[offset] = duydyl - templ;
+ epsilondev_xy[offset] = 0.5 * duxdyl_plus_duydxl;
+ epsilondev_xz[offset] = 0.5 * duzdxl_plus_duxdzl;
+ epsilondev_yz[offset] = 0.5 * duzdyl_plus_duydzl;
+ if(SIMULATION_TYPE == 3) {
+ epsilon_trace_over_3[tx + working_element*125] = templ;
+ }
+ }
+
+// compute elements with an elastic isotropic rheology
+ kappal = d_kappav[offset];
+ mul = d_muv[offset];
+
+ lambdalplus2mul = kappal + 1.33333333333333333333f * mul; // 4./3. = 1.3333333
+ lambdal = lambdalplus2mul - 2.f*mul;
+
+// compute the six components of the stress tensor sigma
+ sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl;
+ sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl;
+ sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl;
+
+ sigma_xy = mul*duxdyl_plus_duydxl;
+ sigma_xz = mul*duzdxl_plus_duxdzl;
+ sigma_yz = mul*duzdyl_plus_duydzl;
+
+ jacobianl = 1.f / (xixl*(etayl*gammazl-etazl*gammayl)-xiyl*(etaxl*gammazl-etazl*gammaxl)+xizl*(etaxl*gammayl-etayl*gammaxl));
+
+// form the dot product with the test vector
+ s_tempx1[tx] = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl);
+ s_tempy1[tx] = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_yz*xizl);
+ s_tempz1[tx] = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl);
+
+ s_tempx2[tx] = jacobianl * (sigma_xx*etaxl + sigma_xy*etayl + sigma_xz*etazl);
+ s_tempy2[tx] = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_yz*etazl);
+ s_tempz2[tx] = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl);
+
+ s_tempx3[tx] = jacobianl * (sigma_xx*gammaxl + sigma_xy*gammayl + sigma_xz*gammazl);
+ s_tempy3[tx] = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_yz*gammazl);
+ s_tempz3[tx] = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl);
+
+ }
+
+// synchronize all the threads (one thread for each of the NGLL grid points of the
+// current spectral element) because we need the whole element to be ready in order
+// to be able to compute the matrix products along cut planes of the 3D element below
+ __syncthreads();
+
+ if (active) {
+
+#ifndef MANUALLY_UNROLLED_LOOPS
+
+ tempx1l = 0.f;
+ tempy1l = 0.f;
+ tempz1l = 0.f;
+
+ tempx2l = 0.f;
+ tempy2l = 0.f;
+ tempz2l = 0.f;
+
+ tempx3l = 0.f;
+ tempy3l = 0.f;
+ tempz3l = 0.f;
+
+ for (l=0;l<NGLLX;l++) {
+
+ fac1 = d_hprimewgll_xx[I*NGLLX+l];
+ offset = K*NGLL2+J*NGLLX+l;
+ tempx1l += s_tempx1[offset]*fac1;
+ tempy1l += s_tempy1[offset]*fac1;
+ tempz1l += s_tempz1[offset]*fac1;
+
+ fac2 = d_hprimewgll_xx[J*NGLLX+l];
+ offset = K*NGLL2+l*NGLLX+I;
+ tempx2l += s_tempx2[offset]*fac2;
+ tempy2l += s_tempy2[offset]*fac2;
+ tempz2l += s_tempz2[offset]*fac2;
+
+ fac3 = d_hprimewgll_xx[K*NGLLX+l];
+ offset = l*NGLL2+J*NGLLX+I;
+ tempx3l += s_tempx3[offset]*fac3;
+ tempy3l += s_tempy3[offset]*fac3;
+ tempz3l += s_tempz3[offset]*fac3;
+
+ if(working_element == 169)
+ if(l==0)
+ if(I+J+K == 0) {
+ // atomicAdd(&d_debug[0],1.0);
+ // d_debug[0] = fac3;
+ // d_debug[1] = offset;
+ // d_debug[2] = s_tempz3[offset];
+ }
+ }
+#else
+
+ tempx1l = s_tempx1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
+ + s_tempx1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
+ + s_tempx1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
+ + s_tempx1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
+ + s_tempx1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
+
+ tempy1l = s_tempy1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
+ + s_tempy1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
+ + s_tempy1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
+ + s_tempy1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
+ + s_tempy1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
+
+ tempz1l = s_tempz1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
+ + s_tempz1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
+ + s_tempz1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
+ + s_tempz1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
+ + s_tempz1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
+
+ tempx2l = s_tempx2[K*NGLL2+I]*d_hprimewgll_xx[J*NGLLX]
+ + s_tempx2[K*NGLL2+NGLLX+I]*d_hprimewgll_xx[J*NGLLX+1]
+ + s_tempx2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+2]
+ + s_tempx2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+3]
+ + s_tempx2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+4];
+
+ tempy2l = s_tempy2[K*NGLL2+I]*d_hprimewgll_xx[J*NGLLX]
+ + s_tempy2[K*NGLL2+NGLLX+I]*d_hprimewgll_xx[J*NGLLX+1]
+ + s_tempy2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+2]
+ + s_tempy2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+3]
+ + s_tempy2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+4];
+
+ tempz2l = s_tempz2[K*NGLL2+I]*d_hprimewgll_xx[J*NGLLX]
+ + s_tempz2[K*NGLL2+NGLLX+I]*d_hprimewgll_xx[J*NGLLX+1]
+ + s_tempz2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+2]
+ + s_tempz2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+3]
+ + s_tempz2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+4];
+
+ tempx3l = s_tempx3[J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX]
+ + s_tempx3[NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+1]
+ + s_tempx3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+2]
+ + s_tempx3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+3]
+ + s_tempx3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+4];
+
+ tempy3l = s_tempy3[J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX]
+ + s_tempy3[NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+1]
+ + s_tempy3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+2]
+ + s_tempy3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+3]
+ + s_tempy3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+4];
+
+ tempz3l = s_tempz3[J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX]
+ + s_tempz3[NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+1]
+ + s_tempz3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+2]
+ + s_tempz3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+3]
+ + s_tempz3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+4];
+
+#endif
+
+ fac1 = d_wgllwgll_yz[K*NGLLX+J];
+ fac2 = d_wgllwgll_xz[K*NGLLX+I];
+ fac3 = d_wgllwgll_xy[J*NGLLX+I];
+
+#ifdef USE_TEXTURES
+ d_accel[iglob] = tex1Dfetch(tex_accel, iglob) - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
+ d_accel[iglob + NGLOB] = tex1Dfetch(tex_accel, iglob + NGLOB) - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
+ d_accel[iglob + 2*NGLOB] = tex1Dfetch(tex_accel, iglob + 2*NGLOB) - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+#else
+ /* OLD/To be implemented version that uses coloring to get around race condition. About 1.6x faster */
+ // d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
+ // d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
+ // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+
+ if(iglob*3+2 == 41153) {
+ // int ot = d_debug[5];
+ // d_debug[0+1+ot] = d_accel[iglob*3+2];
+ // // d_debug[1+1+ot] = fac1*tempz1l;
+ // // d_debug[2+1+ot] = fac2*tempz2l;
+ // // d_debug[3+1+ot] = fac3*tempz3l;
+ // d_debug[1+1+ot] = fac1;
+ // d_debug[2+1+ot] = fac2;
+ // d_debug[3+1+ot] = fac3;
+ // d_debug[4+1+ot] = d_accel[iglob*3+2]-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+ // atomicAdd(&d_debug[0],1.0);
+ // d_debug[6+ot] = d_displ[iglob*3+2];
+ }
+
+ atomicAdd(&d_accel[iglob*3],-(fac1*tempx1l + fac2*tempx2l + fac3*tempx3l));
+ atomicAdd(&d_accel[iglob*3+1],-(fac1*tempy1l + fac2*tempy2l + fac3*tempy3l));
+ atomicAdd(&d_accel[iglob*3+2],-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l));
+
+#endif
+ }
+
+#else // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+ d_accel[iglob] -= 0.00000001f;
+ d_accel[iglob + NGLOB] -= 0.00000001f;
+ d_accel[iglob + 2*NGLOB] -= 0.00000001f;
+#endif // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void kernel_3_cuda_device(real* veloc,
+ real* accel, int size,
+ real deltatover2, real* rmass);
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(kernel_3_cuda,
+ KERNEL_3_CUDA)(long* Mesh_pointer,
+ int* size_F,
+ float* deltatover2_F,
+ int* SIMULATION_TYPE_f,
+ float* b_deltatover2) {
+TRACE("kernel_3_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
+ int size = *size_F;
+ int SIMULATION_TYPE = *SIMULATION_TYPE_f;
+ real deltatover2 = *deltatover2_F;
+ int blocksize=128;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc, mp->d_accel, size, deltatover2, mp->d_rmass);
+
+ if(SIMULATION_TYPE == 3) {
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc, mp->d_b_accel, size, *b_deltatover2,mp->d_rmass);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("after kernel 3");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+ __global__ void kernel_3_cuda_device(real* veloc,
+ real* accel, int size,
+ real deltatover2, real* rmass) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ accel[3*id] = accel[3*id]*rmass[id];
+ accel[3*id+1] = accel[3*id+1]*rmass[id];
+ accel[3*id+2] = accel[3*id+2]*rmass[id];
+
+ veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
+ veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
+ veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
+ }
+ }
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/* note:
+ constant arrays when used in compute_forces_acoustic_cuda.cu routines stay zero,
+ constant declaration and cudaMemcpyToSymbol would have to be in the same file...
+
+ extern keyword doesn't work for __constant__ declarations.
+
+ also:
+ cudaMemcpyToSymbol("deviceCaseParams", caseParams, sizeof(CaseParams));
+ ..
+ and compile with -arch=sm_20
+
+ see also: http://stackoverflow.com/questions/4008031/how-to-use-cuda-constant-memory-in-a-programmer-pleasant-way
+ doesn't seem to work.
+
+ we could keep arrays separated for acoustic and elastic routines...
+
+ for now, we store pointers with cudaGetSymbolAddress() function calls.
+
+ */
+
+
+// constant arrays
+
+void setConst_hprime_xx(float* array,Mesh* mp)
+{
+
+ cudaError_t err = cudaMemcpyToSymbol(d_hprime_xx, array, NGLL2*sizeof(float));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_hprime_xx: %s\n", cudaGetErrorString(err));
+ fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
+ exit(1);
+ }
+
+ err = cudaGetSymbolAddress((void**)&(mp->d_hprime_xx),"d_hprime_xx");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_hprime_xx: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+}
+
+void setConst_hprime_yy(float* array,Mesh* mp)
+{
+
+ cudaError_t err = cudaMemcpyToSymbol(d_hprime_yy, array, NGLL2*sizeof(float));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_hprime_yy: %s\n", cudaGetErrorString(err));
+ fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
+ exit(1);
+ }
+
+ err = cudaGetSymbolAddress((void**)&(mp->d_hprime_yy),"d_hprime_yy");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_hprime_yy: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+}
+
+void setConst_hprime_zz(float* array,Mesh* mp)
+{
+
+ cudaError_t err = cudaMemcpyToSymbol(d_hprime_zz, array, NGLL2*sizeof(float));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_hprime_zz: %s\n", cudaGetErrorString(err));
+ fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
+ exit(1);
+ }
+
+ err = cudaGetSymbolAddress((void**)&(mp->d_hprime_zz),"d_hprime_zz");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_hprime_zz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+}
+
+
+void setConst_hprimewgll_xx(float* array,Mesh* mp)
+{
+ cudaError_t err = cudaMemcpyToSymbol(d_hprimewgll_xx, array, NGLL2*sizeof(float));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_hprime_xx: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+
+ err = cudaGetSymbolAddress((void**)&(mp->d_hprimewgll_xx),"d_hprimewgll_xx");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_hprimewgll_xx: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+}
+
+void setConst_wgllwgll_xy(float* array,Mesh* mp)
+{
+ cudaError_t err = cudaMemcpyToSymbol(d_wgllwgll_xy, array, NGLL2*sizeof(float));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_wgllwgll_xy: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ //mp->d_wgllwgll_xy = d_wgllwgll_xy;
+ err = cudaGetSymbolAddress((void**)&(mp->d_wgllwgll_xy),"d_wgllwgll_xy");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_wgllwgll_xy: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+
+}
+
+void setConst_wgllwgll_xz(float* array,Mesh* mp)
+{
+ cudaError_t err = cudaMemcpyToSymbol(d_wgllwgll_xz, array, NGLL2*sizeof(float));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_wgllwgll_xz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ //mp->d_wgllwgll_xz = d_wgllwgll_xz;
+ err = cudaGetSymbolAddress((void**)&(mp->d_wgllwgll_xz),"d_wgllwgll_xz");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_wgllwgll_xz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+
+}
+
+void setConst_wgllwgll_yz(float* array,Mesh* mp)
+{
+ cudaError_t err = cudaMemcpyToSymbol(d_wgllwgll_yz, array, NGLL2*sizeof(float));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in setConst_wgllwgll_yz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ //mp->d_wgllwgll_yz = d_wgllwgll_yz;
+ err = cudaGetSymbolAddress((void**)&(mp->d_wgllwgll_yz),"d_wgllwgll_yz");
+ if(err != cudaSuccess) {
+ fprintf(stderr, "Error with d_wgllwgll_yz: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+
+}
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels_cuda.cu)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,521 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ELASTIC SIMULATIONS
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_kernels_cudakernel(int* ispec_is_elastic, int* ibool,
+ float* accel,
+ float* b_displ,
+ float* epsilondev_xx,
+ float* epsilondev_yy,
+ float* epsilondev_xy,
+ float* epsilondev_xz,
+ float* epsilondev_yz,
+ float* b_epsilondev_xx,
+ float* b_epsilondev_yy,
+ float* b_epsilondev_xy,
+ float* b_epsilondev_xz,
+ float* b_epsilondev_yz,
+ float* rho_kl,
+ float deltat,
+ float* mu_kl,
+ float* kappa_kl,
+ float* epsilon_trace_over_3,
+ float* b_epsilon_trace_over_3,
+ int NSPEC_AB,
+ float* d_debug) {
+
+ int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+ if(ispec<NSPEC_AB) { // handles case when there is 1 extra block (due to rectangular grid)
+ int ijk = threadIdx.x;
+ int ijk_ispec = ijk + 125*ispec;
+ int iglob = ibool[ijk_ispec]-1;
+
+ // if(ispec_is_elastic[ispec]) { // leave out until have acoustic coupling
+ if(1) {
+
+
+ if(ijk_ispec == 9480531) {
+ d_debug[0] = rho_kl[ijk_ispec];
+ d_debug[1] = accel[3*iglob];
+ d_debug[2] = b_displ[3*iglob];
+ d_debug[3] = deltat * (accel[3*iglob]*b_displ[3*iglob]+
+ accel[3*iglob+1]*b_displ[3*iglob+1]+
+ accel[3*iglob+2]*b_displ[3*iglob+2]);
+ }
+
+ rho_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_displ[3*iglob]+
+ accel[3*iglob+1]*b_displ[3*iglob+1]+
+ accel[3*iglob+2]*b_displ[3*iglob+2]);
+
+
+
+ // if(rho_kl[ijk_ispec] < 1.9983e+18) {
+ // atomicAdd(&d_debug[3],1.0);
+ // d_debug[4] = ijk_ispec;
+ // d_debug[0] = rho_kl[ijk_ispec];
+ // d_debug[1] = accel[3*iglob];
+ // d_debug[2] = b_displ[3*iglob];
+ // }
+
+ mu_kl[ijk_ispec] += deltat * (epsilondev_xx[ijk_ispec]*b_epsilondev_xx[ijk_ispec]+ // 1*b1
+ epsilondev_yy[ijk_ispec]*b_epsilondev_yy[ijk_ispec]+ // 2*b2
+ (epsilondev_xx[ijk_ispec]+epsilondev_yy[ijk_ispec])*
+ (b_epsilondev_xx[ijk_ispec]+b_epsilondev_yy[ijk_ispec])+
+ 2*(epsilondev_xy[ijk_ispec]*b_epsilondev_xy[ijk_ispec]+
+ epsilondev_xz[ijk_ispec]*b_epsilondev_xz[ijk_ispec]+
+ epsilondev_yz[ijk_ispec]*b_epsilondev_yz[ijk_ispec]));
+
+ kappa_kl[ijk_ispec] += deltat*(9*epsilon_trace_over_3[ijk_ispec]*
+ b_epsilon_trace_over_3[ijk_ispec]);
+
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_kernels_cuda,
+ COMPUTE_KERNELS_CUDA)(long* Mesh_pointer, int* NOISE_TOMOGRAPHY,
+ int* ELASTIC_SIMULATION, int* SAVE_MOHO_MESH,float* deltat) {
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ int blocksize = 125; // NGLLX*NGLLY*NGLLZ
+ int num_blocks_x = mp->NSPEC_AB;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ float* d_debug;
+ float* h_debug;
+ h_debug = (float*)calloc(128,sizeof(float));
+ cudaMalloc((void**)&d_debug,128*sizeof(float));
+ cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
+
+
+ compute_kernels_cudakernel<<<grid,threads>>>(mp->d_ispec_is_elastic,mp->d_ibool,
+ mp->d_accel, mp->d_b_displ,
+ mp->d_epsilondev_xx,
+ mp->d_epsilondev_yy,
+ mp->d_epsilondev_xy,
+ mp->d_epsilondev_xz,
+ mp->d_epsilondev_yz,
+ mp->d_b_epsilondev_xx,
+ mp->d_b_epsilondev_yy,
+ mp->d_b_epsilondev_xy,
+ mp->d_b_epsilondev_xz,
+ mp->d_b_epsilondev_yz,
+ mp->d_rho_kl,
+ *deltat,
+ mp->d_mu_kl,
+ mp->d_kappa_kl,
+ mp->d_epsilon_trace_over_3,
+ mp->d_b_epsilon_trace_over_3,
+ mp->NSPEC_AB,
+ d_debug);
+
+ cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaFree(d_debug);
+ // for(int i=0;i<5;i++) {
+ // printf("d_debug[%d]=%e\n",i,h_debug[i]);
+ // }
+ free(h_debug);
+
+ // float* h_rho = (float*)malloc(sizeof(float)*mp->NSPEC_AB*125);
+ // float maxval = 0;
+ // cudaMemcpy(h_rho,mp->d_rho_kl,sizeof(float)*mp->NSPEC_AB*125,cudaMemcpyDeviceToHost);
+ // int number_big_values = 0;
+ // for(int i=0;i<mp->NSPEC_AB*125;i++) {
+ // maxval = MAX(maxval,fabsf(h_rho[i]));
+ // if(fabsf(h_rho[i]) > 1e10) {
+ // number_big_values++;
+ // }
+ // }
+
+ // printf("maval rho = %e, number>1e10 = %d vs. %d\n",maxval,number_big_values,mp->NSPEC_AB*125);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_kernels_cudakernel");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_sensitivity_kernels_to_host,
+ TRANSFER_SENSITIVITY_KERNELS_TO_HOST)(long* Mesh_pointer, float* h_rho_kl,
+ float* h_mu_kl, float* h_kappa_kl,
+ float* h_Sigma_kl,int* NSPEC_AB,int* NSPEC_AB_VAL) {
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(h_rho_kl,mp->d_rho_kl,*NSPEC_AB*125*sizeof(float),
+ cudaMemcpyDeviceToHost),1);
+ print_CUDA_error_if_any(cudaMemcpy(h_mu_kl,mp->d_mu_kl,*NSPEC_AB*125*sizeof(float),
+ cudaMemcpyDeviceToHost),1);
+ print_CUDA_error_if_any(cudaMemcpy(h_kappa_kl,mp->d_kappa_kl,*NSPEC_AB*125*sizeof(float),
+ cudaMemcpyDeviceToHost),1);
+ print_CUDA_error_if_any(cudaMemcpy(h_Sigma_kl,mp->d_Sigma_kl,125*(*NSPEC_AB_VAL)*sizeof(float),
+ cudaMemcpyHostToDevice),4);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_kernels_strength_noise_cuda_kernel(float* displ,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* ibool,
+ float* noise_surface_movie,
+ float* normal_x_noise,
+ float* normal_y_noise,
+ float* normal_z_noise,
+ float* Sigma_kl,
+ float deltat,
+ int num_free_surface_faces,
+ float* d_debug) {
+ int iface = blockIdx.x + blockIdx.y*gridDim.x;
+ if(iface<num_free_surface_faces) {
+
+ int ispec = free_surface_ispec[iface]-1;
+ int igll = threadIdx.x;
+ int ipoin = igll + 25*iface;
+ int i = free_surface_ijk[INDEX3(3,25,0,igll,iface)]-1;
+ int j = free_surface_ijk[INDEX3(3,25,0,igll,iface)]-1;
+ int k = free_surface_ijk[INDEX3(3,25,0,igll,iface)]-1;
+
+ int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
+ float eta = (noise_surface_movie[INDEX3(3,25,0,igll,iface)]*normal_x_noise[ipoin]+
+ noise_surface_movie[INDEX3(3,25,1,igll,iface)]*normal_y_noise[ipoin]+
+ noise_surface_movie[INDEX3(3,25,2,igll,iface)]*normal_z_noise[ipoin]);
+
+ // if(ijk_ispec == 78496) {
+ // d_debug[0] = Sigma_kl[ijk_ispec];
+ // d_debug[1] = eta;
+ // d_debug[2] = normal_x_noise[ipoin];
+ // d_debug[3] = normal_y_noise[ipoin];
+ // d_debug[4] = normal_z_noise[ipoin];
+ // d_debug[5] = displ[3*iglob+2];
+ // d_debug[6] = deltat*eta*normal_z_noise[ipoin]*displ[2+3*iglob];
+ // d_debug[7] = 0.008*1.000000e-24*normal_z_noise[ipoin]*3.740546e-13;
+ // }
+
+ Sigma_kl[INDEX4(5,5,5,i,j,k,ispec)] += deltat*eta*(normal_x_noise[ipoin]*displ[3*iglob]+
+ normal_y_noise[ipoin]*displ[1+3*iglob]+
+ normal_z_noise[ipoin]*displ[2+3*iglob]);
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_kernels_strength_noise_cuda,
+ COMPUTE_KERNELS_STRENGTH_NOISE_CUDA)(long* Mesh_pointer,
+ float* h_noise_surface_movie,
+ int* num_free_surface_faces_f,
+ float* deltat) {
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+ int num_free_surface_faces = *num_free_surface_faces_f;
+
+ cudaMemcpy(mp->d_noise_surface_movie,h_noise_surface_movie,3*25*num_free_surface_faces*sizeof(float),cudaMemcpyHostToDevice);
+
+
+ int num_blocks_x = num_free_surface_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(25,1,1);
+
+ // float* h_debug = (float*)calloc(128,sizeof(float));
+ float* d_debug;
+ // cudaMalloc((void**)&d_debug,128*sizeof(float));
+ // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
+
+ compute_kernels_strength_noise_cuda_kernel<<<grid,threads>>>(mp->d_displ,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ mp->d_ibool,
+ mp->d_noise_surface_movie,
+ mp->d_normal_x_noise,
+ mp->d_normal_y_noise,
+ mp->d_normal_z_noise,
+ mp->d_Sigma_kl,*deltat,
+ num_free_surface_faces,
+ d_debug);
+
+ // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+ // for(int i=0;i<8;i++) {
+ // printf("debug[%d]= %e\n",i,h_debug[i]);
+ // }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_kernels_strength_noise_cuda_kernel");
+#endif
+
+}
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ACOUSTIC SIMULATIONS
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__device__ void compute_gradient_kernel(int ijk,
+ int ispec,
+ float* scalar_field,
+ float* vector_field_element,
+ float* hprime_xx,
+ float* hprime_yy,
+ float* hprime_zz,
+ float* d_xix,
+ float* d_xiy,
+ float* d_xiz,
+ float* d_etax,
+ float* d_etay,
+ float* d_etaz,
+ float* d_gammax,
+ float* d_gammay,
+ float* d_gammaz,
+ float rhol) {
+
+ float temp1l,temp2l,temp3l;
+ float hp1,hp2,hp3;
+ float xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl;
+ float rho_invl;
+ int l,offset,offset1,offset2,offset3;
+
+ //const int NGLLX = 5;
+ const int NGLL3_ALIGN = 128;
+
+ int K = (ijk/NGLL2);
+ int J = ((ijk-K*NGLL2)/NGLLX);
+ int I = (ijk-K*NGLL2-J*NGLLX);
+
+ // derivative along x
+ temp1l = 0.f;
+ for( l=0; l<NGLLX;l++){
+ hp1 = hprime_xx[l*NGLLX+I];
+ offset1 = K*NGLL2+J*NGLLX+l;
+ temp1l += scalar_field[offset1]*hp1;
+ }
+
+ // derivative along y
+ temp2l = 0.f;
+ for( l=0; l<NGLLX;l++){
+ hp2 = hprime_yy[l*NGLLX+J];
+ offset2 = K*NGLL2+l*NGLLX+I;
+ temp2l += scalar_field[offset2]*hp2;
+ }
+
+ // derivative along z
+ temp3l = 0.f;
+ for( l=0; l<NGLLX;l++){
+ hp3 = hprime_zz[l*NGLLX+K];
+ offset3 = l*NGLL2+J*NGLLX+I;
+ temp3l += scalar_field[offset3]*hp3;
+
+ }
+
+ offset = ispec*NGLL3_ALIGN + ijk;
+
+ xixl = d_xix[offset];
+ xiyl = d_xiy[offset];
+ xizl = d_xiz[offset];
+ etaxl = d_etax[offset];
+ etayl = d_etay[offset];
+ etazl = d_etaz[offset];
+ gammaxl = d_gammax[offset];
+ gammayl = d_gammay[offset];
+ gammazl = d_gammaz[offset];
+
+ rho_invl = 1.0f / rhol;
+
+ // derivatives of acoustic scalar potential field on GLL points
+ vector_field_element[0] = (temp1l*xixl + temp2l*etaxl + temp3l*gammaxl) * rho_invl;
+ vector_field_element[1] = (temp1l*xiyl + temp2l*etayl + temp3l*gammayl) * rho_invl;
+ vector_field_element[2] = (temp1l*xizl + temp2l*etazl + temp3l*gammazl) * rho_invl;
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void compute_kernels_acoustic_kernel(int* ispec_is_acoustic,
+ int* ibool,
+ float* rhostore,
+ float* kappastore,
+ float* hprime_xx,
+ float* hprime_yy,
+ float* hprime_zz,
+ float* d_xix,
+ float* d_xiy,
+ float* d_xiz,
+ float* d_etax,
+ float* d_etay,
+ float* d_etaz,
+ float* d_gammax,
+ float* d_gammay,
+ float* d_gammaz,
+ float* potential_dot_dot_acoustic,
+ float* b_potential_acoustic,
+ float* b_potential_dot_dot_acoustic,
+ float* rho_ac_kl,
+ float* kappa_ac_kl,
+ float deltat,
+ int NSPEC_AB) {
+
+ int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+ int ijk = threadIdx.x;
+
+ // local and global indices
+ int ijk_ispec = ijk + 125*ispec;
+ int ijk_ispec_padded = ijk + 128*ispec;
+ int iglob = ibool[ijk_ispec]-1;
+
+ float accel_elm[3];
+ float b_displ_elm[3];
+ float rhol,kappal;
+
+ // shared memory between all threads within this block
+ __shared__ float scalar_field_displ[125];
+ __shared__ float scalar_field_accel[125];
+
+ if( ispec < NSPEC_AB ){
+ if( ispec_is_acoustic[ispec] == 1) {
+
+ // copy field values
+ scalar_field_displ[ijk] = b_potential_acoustic[iglob];
+ scalar_field_accel[ijk] = potential_dot_dot_acoustic[iglob];
+ __syncthreads();
+
+ // gets material parameter
+ rhol = rhostore[ijk_ispec_padded];
+
+ // displacement vector from backward field
+ compute_gradient_kernel(ijk,ispec,scalar_field_displ,b_displ_elm,
+ hprime_xx,hprime_yy,hprime_zz,
+ d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
+ rhol);
+
+ // acceleration vector
+ compute_gradient_kernel(ijk,ispec,scalar_field_accel,accel_elm,
+ hprime_xx,hprime_yy,hprime_zz,
+ d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
+ rhol);
+
+ // density kernel
+ rho_ac_kl[ijk_ispec] -= deltat * rhol * (accel_elm[0]*b_displ_elm[0] +
+ accel_elm[1]*b_displ_elm[1] +
+ accel_elm[2]*b_displ_elm[2]);
+
+ // bulk modulus kernel
+ kappal = kappastore[ijk_ispec];
+ kappa_ac_kl[ijk_ispec] -= deltat / kappal * potential_dot_dot_acoustic[iglob]
+ * b_potential_dot_dot_acoustic[iglob];
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(compute_kernels_acoustic_cuda,
+ COMPUTE_KERNELS_ACOUSTIC_CUDA)(
+ long* Mesh_pointer,
+ float* deltat) {
+
+TRACE("compute_kernels_acoustic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ int blocksize = 125; // NGLLX*NGLLY*NGLLZ
+ int num_blocks_x = mp->NSPEC_AB;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ compute_kernels_acoustic_kernel<<<grid,threads>>>(mp->d_ispec_is_acoustic,
+ mp->d_ibool,
+ mp->d_rhostore,
+ mp->d_kappastore,
+ mp->d_hprime_xx,
+ mp->d_hprime_yy,
+ mp->d_hprime_zz,
+ mp->d_xix,
+ mp->d_xiy,
+ mp->d_xiz,
+ mp->d_etax,
+ mp->d_etay,
+ mp->d_etaz,
+ mp->d_gammax,
+ mp->d_gammay,
+ mp->d_gammaz,
+ mp->d_potential_dot_dot_acoustic,
+ mp->d_b_potential_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
+ mp->d_rho_ac_kl,
+ mp->d_kappa_ac_kl,
+ *deltat,
+ mp->NSPEC_AB);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_kernels_acoustic_kernel");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_sensitivity_kernels_acoustic_to_host,
+ TRANSFER_SENSITIVITY_KERNELS_ACOUSTIC_TO_HOST)(long* Mesh_pointer,
+ float* h_rho_ac_kl,
+ float* h_kappa_ac_kl,
+ int* NSPEC_AB) {
+
+TRACE("transfer_sensitivity_kernels_acoustic_to_host");
+
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
+ int size = *NSPEC_AB*125;
+
+ // copies kernel values over to CPU host
+ print_CUDA_error_if_any(cudaMemcpy(h_rho_ac_kl,mp->d_rho_ac_kl,size*sizeof(float),
+ cudaMemcpyDeviceToHost),911);
+ print_CUDA_error_if_any(cudaMemcpy(h_kappa_ac_kl,mp->d_kappa_ac_kl,size*sizeof(float),
+ cudaMemcpyDeviceToHost),922);
+}
Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,153 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_stacey_acoustic_kernel(float* potential_dot_acoustic,
+ float* potential_dot_dot_acoustic,
+ int* abs_boundary_ispec,
+ int* abs_boundary_ijk,
+ int* ibool,
+ float* rhostore,
+ float* kappastore,
+ real* abs_boundary_jacobian2Dw,
+ int* ispec_is_inner,
+ int* ispec_is_acoustic,
+ int phase_is_inner,
+ int SIMULATION_TYPE, int SAVE_FORWARD,
+ float* b_potential_dot_acoustic,
+ float* b_potential_dot_dot_acoustic,
+ float* b_absorb_potential // ,float* debug_val,int* debug_val_int
+ ) {
+
+ int igll = threadIdx.x; // tx
+ int iface = blockIdx.x + gridDim.x*blockIdx.y; // bx
+
+
+ int i,j,k,iglob,ispec;
+ realw rhol,kappal,cpl;
+ realw jacobianw;
+
+ // don't compute points outside NGLLSQUARE==NGLL2==25
+ // way 2: no further check needed since blocksize = 25
+ // if(igll<NGLL2) {
+
+ // "-1" from index values to convert from Fortran-> C indexing
+ ispec = abs_boundary_ispec[iface]-1;
+
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_acoustic[ispec]==1) {
+
+ i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
+ j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
+ k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
+ iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
+
+ // determines bulk sound speed
+ rhol = rhostore[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+ kappal = kappastore[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+ cpl = sqrt( kappal / rhol );
+
+ // gets associated, weighted jacobian
+ jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
+//daniel
+//if( igll == 0 ) printf("gpu: %i %i %i %i %i %e %e %e\n",i,j,k,ispec,iglob,rhol,kappal,jacobianw);
+
+ // Sommerfeld condition
+ atomicAdd(&potential_dot_dot_acoustic[iglob],-potential_dot_acoustic[iglob]*jacobianw/cpl/rhol);
+
+ // adjoint simulations
+ if( SIMULATION_TYPE == 3 ){
+ // Sommerfeld condition
+ atomicAdd(&b_potential_dot_dot_acoustic[iglob],-b_absorb_potential[INDEX2(NGLL2,igll,iface)]);
+ }else if( SIMULATION_TYPE == 1 && SAVE_FORWARD == 1 ){
+ b_absorb_potential[INDEX2(NGLL2,igll,iface)] = potential_dot_acoustic[iglob]*jacobianw/cpl/rhol;
+ }
+
+ }
+// }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_stacey_acoustic_cuda,
+ COMPUTE_STACEY_ACOUSTIC_CUDA)(
+ long* Mesh_pointer_f,
+ int* phase_is_innerf,
+ int* num_abs_boundary_facesf,
+ int* SIMULATION_TYPEf,
+ int* SAVE_FORWARDf,
+ float* h_b_absorb_potential) {
+TRACE("compute_stacey_acoustic_cuda");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ int phase_is_inner = *phase_is_innerf;
+ int num_abs_boundary_faces = *num_abs_boundary_facesf;
+ int SIMULATION_TYPE = *SIMULATION_TYPEf;
+ int SAVE_FORWARD = *SAVE_FORWARDf;
+
+ // way 1: Elapsed time: 4.385948e-03
+ // > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
+ // int blocksize = 32;
+
+ // way 2: Elapsed time: 4.379034e-03
+ // > NGLLSQUARE==NGLL2==25, no further check inside kernel
+ int blocksize = 25;
+
+ int num_blocks_x = num_abs_boundary_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // adjoint simulations: reads in absorbing boundary
+ if (SIMULATION_TYPE == 3 && num_abs_boundary_faces > 0 ){
+ // copies array to GPU
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,h_b_absorb_potential,
+ mp->d_b_reclen_potential,cudaMemcpyHostToDevice),700);
+ }
+
+ compute_stacey_acoustic_kernel<<<grid,threads>>>(mp->d_potential_dot_acoustic,
+ mp->d_potential_dot_dot_acoustic,
+ mp->d_abs_boundary_ispec,
+ mp->d_abs_boundary_ijk,
+ mp->d_ibool,
+ mp->d_rhostore,
+ mp->d_kappastore,
+ mp->d_abs_boundary_jacobian2Dw,
+ mp->d_ispec_is_inner,
+ mp->d_ispec_is_acoustic,
+ phase_is_inner,
+ SIMULATION_TYPE,SAVE_FORWARD,
+ mp->d_b_potential_dot_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
+ mp->d_b_absorb_potential);
+
+ // adjoint simulations: stores absorbed wavefield part
+ if (SIMULATION_TYPE == 1 && SAVE_FORWARD == 1 && num_abs_boundary_faces > 0 ){
+ // copies array to CPU
+ print_CUDA_error_if_any(cudaMemcpy(h_b_absorb_potential,mp->d_b_absorb_potential,
+ mp->d_b_reclen_potential,cudaMemcpyDeviceToHost),701);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_stacey_acoustic_kernel");
+#endif
+}
+
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic_cuda.cu)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,193 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_stacey_elastic_kernel(real* veloc,
+ real* accel,
+ real* b_accel,
+ int* abs_boundary_ispec,
+ int* abs_boundary_ijk, int* ibool,
+ real* abs_boundary_normal,
+ real* rho_vp, real* rho_vs,
+ real* abs_boundary_jacobian2Dw,
+ real* b_absorb_field,
+ int* ispec_is_inner, int* ispec_is_elastic,
+ int phase_is_inner,float* debug_val,int* debug_val_int,
+ int num_abs_boundary_faces,
+ int SAVE_FORWARD,int SIMULATION_TYPE) {
+
+ int igll = threadIdx.x; // tx
+ int iface = blockIdx.x + gridDim.x*blockIdx.y; // bx
+ int i;
+ int j;
+ int k;
+ int iglob;
+ int ispec;
+ realw vx,vy,vz,vn;
+ realw nx,ny,nz;
+ realw rho_vp_temp,rho_vs_temp;
+ realw tx,ty,tz;
+ realw jacobianw;
+
+ // don't compute points outside NGLLSQUARE == NGLL2 == 25
+ if(igll < NGLL2 && iface < num_abs_boundary_faces) {
+
+ // "-1" from index values to convert from Fortran-> C indexing
+ ispec = abs_boundary_ispec[iface]-1;
+ i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
+ j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
+ k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
+ iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
+
+ if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec]==1) {
+
+ i = abs_boundary_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)]-1;
+ j = abs_boundary_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)]-1;
+ k = abs_boundary_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)]-1;
+ iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
+
+ // gets associated velocity
+
+ vx = veloc[iglob*3+0];
+ vy = veloc[iglob*3+1];
+ vz = veloc[iglob*3+2];
+
+ // gets associated normal
+ nx = abs_boundary_normal[INDEX3(NDIM,NGLL2,0,igll,iface)];
+ ny = abs_boundary_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
+ nz = abs_boundary_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
+
+ // // velocity component in normal direction (normal points out of element)
+ vn = vx*nx + vy*ny + vz*nz;
+ rho_vp_temp = rho_vp[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+ rho_vs_temp = rho_vs[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+ tx = rho_vp_temp*vn*nx + rho_vs_temp*(vx-vn*nx);
+ ty = rho_vp_temp*vn*ny + rho_vs_temp*(vy-vn*ny);
+ tz = rho_vp_temp*vn*nz + rho_vs_temp*(vz-vn*nz);
+
+ jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+
+ atomicAdd(&accel[iglob*3],-tx*jacobianw);
+ atomicAdd(&accel[iglob*3+1],-ty*jacobianw);
+ atomicAdd(&accel[iglob*3+2],-tz*jacobianw);
+
+ if(SIMULATION_TYPE == 3) {
+ atomicAdd(&b_accel[iglob*3 ],-b_absorb_field[0+3*(igll+25*(iface))]);
+ atomicAdd(&b_accel[iglob*3+1],-b_absorb_field[1+3*(igll+25*(iface))]);
+ atomicAdd(&b_accel[iglob*3+2],-b_absorb_field[2+3*(igll+25*(iface))]);
+ }
+ else if(SAVE_FORWARD && SIMULATION_TYPE == 1) {
+ b_absorb_field[0+3*(igll+25*(iface))] = tx*jacobianw;
+ b_absorb_field[1+3*(igll+25*(iface))] = ty*jacobianw;
+ b_absorb_field[2+3*(igll+25*(iface))] = tz*jacobianw;
+ }
+
+ }
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(compute_stacey_elastic_cuda,
+ COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
+ int* NSPEC_ABf,
+ int* NGLOB_ABf,
+ int* phase_is_innerf,
+ int* num_abs_boundary_facesf,
+ int* SIMULATION_TYPEf,
+ int* NSTEPf,
+ int* NGLOB_ADJOINTf,
+ int* b_num_abs_boundary_facesf,
+ int* b_reclen_fieldf,
+ float* b_absorb_field,
+ int* SAVE_FORWARDf,
+ int* itf) {
+
+TRACE("compute_stacey_elastic_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ //int fid = 0;
+ //int it = *itf;
+ //int NSPEC_AB = *NSPEC_ABf;
+ //int NGLOB_AB = *NGLOB_ABf;
+ int phase_is_inner = *phase_is_innerf;
+ int num_abs_boundary_faces = *num_abs_boundary_facesf;
+ int SIMULATION_TYPE = *SIMULATION_TYPEf;
+ //int NSTEP = *NSTEPf;
+ int myrank; MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
+ //int NGLOB_ADJOINT = *NGLOB_ADJOINTf;
+ //int b_num_abs_boundary_faces = *b_num_abs_boundary_facesf;
+ int b_reclen_field = *b_reclen_fieldf;
+ int SAVE_FORWARD = *SAVE_FORWARDf;
+
+ int blocksize = 32; // > NGLL2=25, but we handle this inside kernel
+ int num_blocks_x = num_abs_boundary_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ float* d_debug_val;
+ int* d_debug_val_int;
+
+ if(SIMULATION_TYPE == 3 && num_abs_boundary_faces > 0) {
+ // int val = NSTEP-it+1;
+ // read_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&val);
+ // The read is done in fortran
+ cudaMemcpy(mp->d_b_absorb_field,b_absorb_field,b_reclen_field,cudaMemcpyHostToDevice);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("between cudamemcpy and compute_stacey_elastic_kernel");
+#endif
+
+ compute_stacey_elastic_kernel<<<grid,threads>>>(mp->d_veloc,mp->d_accel,mp->d_b_accel,
+ mp->d_abs_boundary_ispec, mp->d_abs_boundary_ijk,
+ mp->d_ibool,
+ mp->d_abs_boundary_normal,
+ mp->d_rho_vp, mp->d_rho_vs,
+ mp->d_abs_boundary_jacobian2Dw,
+ mp->d_b_absorb_field,
+ mp->d_ispec_is_inner,
+ mp->d_ispec_is_elastic,
+ phase_is_inner,
+ d_debug_val,d_debug_val_int,
+ num_abs_boundary_faces,
+ SAVE_FORWARD,SIMULATION_TYPE);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_stacey_elastic_kernel");
+#endif
+
+ // ! adjoint simulations: stores absorbed wavefield part
+ // if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) &
+ // write(IOABS,rec=it) b_reclen_field,b_absorb_field,b_reclen_field
+
+ if(SIMULATION_TYPE==1 && SAVE_FORWARD && num_abs_boundary_faces>0) {
+ cudaMemcpy(b_absorb_field,mp->d_b_absorb_field,b_reclen_field,cudaMemcpyDeviceToHost);
+ // The write is done in fortran
+ // write_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&it);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after compute_stacey_elastic after cudamemcpy");
+#endif
+}
+
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/it_update_displacement_cuda.cu)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,193 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+
+#define CUBLAS_ERROR(s,n) if (s != CUBLAS_STATUS_SUCCESS) { \
+fprintf (stderr, "CUBLAS Memory Write Error @ %d\n",n); \
+exit(EXIT_FAILURE); }
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// elastic wavefield
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void UpdateDispVeloc_kernel(real* displ, real* veloc,
+ real* accel, int size,
+ real deltat, real deltatsqover2, real deltatover2) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ displ[id] = displ[id] + deltat*veloc[id] + deltatsqover2*accel[id];
+ veloc[id] = veloc[id] + deltatover2*accel[id];
+ accel[id] = 0; // can do this using memset...not sure if faster
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(it_update_displacement_scheme_cuda,
+ IT_UPDATE_DISPLACMENT_SCHEME_CUDA)(long* Mesh_pointer_f,
+ int* size_F,
+ float* deltat_F,
+ float* deltatsqover2_F,
+ float* deltatover2_F,
+ int* SIMULATION_TYPE,
+ float* b_deltat_F,
+ float* b_deltatsqover2_F,
+ float* b_deltatover2_F) {
+
+TRACE("it_update_displacement_scheme_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ //int i,device;
+
+ int size = *size_F;
+ real deltat = *deltat_F;
+ real deltatsqover2 = *deltatsqover2_F;
+ real deltatover2 = *deltatover2_F;
+ real b_deltat = *b_deltat_F;
+ real b_deltatsqover2 = *b_deltatsqover2_F;
+ real b_deltatover2 = *b_deltatover2_F;
+ //cublasStatus status;
+
+ int blocksize = 128;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+
+ exit_on_cuda_error("Before UpdateDispVeloc_kernel");
+
+ //launch kernel
+ UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ,mp->d_veloc,mp->d_accel,
+ size,deltat,deltatsqover2,deltatover2);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ // sync and check to catch errors from previous async operations
+ exit_on_cuda_error("UpdateDispVeloc_kernel");
+#endif
+
+
+ // kernel for backward fields
+ if(*SIMULATION_TYPE == 3) {
+
+ UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_b_displ,mp->d_b_veloc,mp->d_b_accel,
+ size,b_deltat, b_deltatsqover2, b_deltatover2);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("after SIM_TYPE==3 UpdateDispVeloc_kernel");
+#endif
+ }
+
+ cudaThreadSynchronize();
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// acoustic wavefield
+
+// KERNEL 1
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void UpdatePotential_kernel(real* potential_acoustic,
+ real* potential_dot_acoustic,
+ real* potential_dot_dot_acoustic,
+ int size,
+ real deltat,
+ real deltatsqover2,
+ real deltatover2) {
+ int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
+
+ /* because of block and grid sizing problems, there is a small */
+ /* amount of buffer at the end of the calculation */
+ if(id < size) {
+ potential_acoustic[id] = potential_acoustic[id]
+ + deltat*potential_dot_acoustic[id]
+ + deltatsqover2*potential_dot_dot_acoustic[id];
+
+ potential_dot_acoustic[id] = potential_dot_acoustic[id]
+ + deltatover2*potential_dot_dot_acoustic[id];
+
+ potential_dot_dot_acoustic[id] = 0;
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(it_update_displacement_scheme_acoustic_cuda,
+ IT_UPDATE_DISPLACEMENT_SCHEME_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
+ int* size_F,
+ float* deltat_F,
+ float* deltatsqover2_F,
+ float* deltatover2_F,
+ int* SIMULATION_TYPE,
+ float* b_deltat_F,
+ float* b_deltatsqover2_F,
+ float* b_deltatover2_F) {
+TRACE("it_update_displacement_scheme_acoustic_cuda");
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ //int i,device;
+ int size = *size_F;
+ real deltat = *deltat_F;
+ real deltatsqover2 = *deltatsqover2_F;
+ real deltatover2 = *deltatover2_F;
+ real b_deltat = *b_deltat_F;
+ real b_deltatsqover2 = *b_deltatsqover2_F;
+ real b_deltatover2 = *b_deltatover2_F;
+ //cublasStatus status;
+
+ int blocksize = 128;
+ int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+ int num_blocks_x = size_padded/blocksize;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ //launch kernel
+ UpdatePotential_kernel<<<grid,threads>>>(mp->d_potential_acoustic,
+ mp->d_potential_dot_acoustic,
+ mp->d_potential_dot_dot_acoustic,
+ size,deltat,deltatsqover2,deltatover2);
+
+ if(*SIMULATION_TYPE == 3) {
+ UpdatePotential_kernel<<<grid,threads>>>(mp->d_b_potential_acoustic,
+ mp->d_b_potential_dot_acoustic,
+ mp->d_b_potential_dot_dot_acoustic,
+ size,b_deltat,b_deltatsqover2,b_deltatover2);
+ }
+
+ cudaThreadSynchronize();
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
+ exit_on_cuda_error("it_update_displacement_scheme_acoustic_cuda");
+#endif
+}
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/mesh_constants_cuda.h)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,221 @@
+#ifndef GPU_MESH_
+#define GPU_MESH_
+#include <sys/types.h>
+#include <unistd.h>
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for debugging and benchmarking
+
+/* ----------------------------------------------------------------------------------------------- */
+
+#define DEBUG 0
+#if DEBUG == 1
+#define TRACE(x) printf("%s\n",x)
+#else
+#define TRACE(x) // printf("%s\n",x);
+#endif
+
+#define MAXDEBUG 0
+#if MAXDEBUG == 1
+#define LOG(x) printf("%s\n",x)
+#define PRINT5(var,offset) for(;print_count<5;print_count++) printf("var(%d)=%2.20f\n",print_count,var[offset+print_count]);
+#define PRINT10(var) if(print_count<10) { printf("var=%1.20e\n",var); print_count++; }
+#define PRINT10i(var) if(print_count<10) { printf("var=%d\n",var); print_count++; }
+#else
+#define LOG(x) // printf("%s\n",x);
+#define PRINT5(var,offset) // for(i=0;i<10;i++) printf("var(%d)=%f\n",i,var[offset+i]);
+#endif
+
+#define ENABLE_VERY_SLOW_ERROR_CHECKING
+
+#define INDEX2(xsize,x,y) x + (y)*xsize
+#define INDEX3(xsize,ysize,x,y,z) x + (y)*xsize + (z)*xsize*ysize
+#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize
+#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize + (j)*xsize*ysize*zsize*isize
+#define INDEX6(xsize,ysize,zsize,isize,jsize,x,y,z,i,j,k) x + xsize*(y + ysize*(z + zsize*(i + isize*(j + jsize*k))))
+
+#define INDEX4_PADDED(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*128
+
+//daniel: check speed of alternatives
+//#define INDEX2(xsize,x,y) x + (y)*xsize
+//#define INDEX3(xsize,ysize,x,y,z) x + xsize*(y + ysize*z)
+//#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + xsize*(y + ysize*(z + zsize*i))
+//#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + xsize*(y + ysize*(z + zsize*(i + isize*j)))
+
+#define MAX(x,y) (((x) < (y)) ? (y) : (x))
+
+double get_time();
+
+void print_CUDA_error_if_any(cudaError_t err, int num);
+
+void pause_for_debugger(int pause);
+
+void exit_on_cuda_error(char* kernel_name);
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// cuda constant arrays
+
+/* ----------------------------------------------------------------------------------------------- */
+
+#define NDIM 3
+#define NGLLX 5
+#define NGLL2 25
+
+typedef float real; // type of variables passed into function
+typedef float realw; // type of "working" variables
+
+// double precision temporary variables leads to 10% performance
+// decrease in Kernel_2_impl (not very much..)
+typedef float reald;
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// mesh pointer wrapper structure
+
+/* ----------------------------------------------------------------------------------------------- */
+
+typedef struct mesh_ {
+
+ // mesh resolution
+ int NSPEC_AB;
+ int NGLOB_AB;
+
+ // interpolators
+ float* d_xix; float* d_xiy; float* d_xiz;
+ float* d_etax; float* d_etay; float* d_etaz;
+ float* d_gammax; float* d_gammay; float* d_gammaz;
+
+ // model parameters
+ float* d_kappav; float* d_muv;
+
+ // global indexing
+ int* d_ibool;
+
+ // pointers to constant memory arrays
+ float* d_hprime_xx; float* d_hprime_yy; float* d_hprime_zz;
+ float* d_hprimewgll_xx;
+ float* d_wgllwgll_xy;
+ float* d_wgllwgll_xz;
+ float* d_wgllwgll_yz;
+
+ // ------------------------------------------------------------------ //
+ // elastic wavefield parameters
+ // ------------------------------------------------------------------ //
+
+ // displacement, velocity, acceleration
+ float* d_displ; float* d_veloc; float* d_accel;
+ // backward/reconstructed elastic wavefield
+ float* d_b_displ; float* d_b_veloc; float* d_b_accel;
+
+ // elastic domain parameters
+ int* d_phase_ispec_inner_elastic;
+ int d_num_phase_ispec_elastic;
+ float* d_rmass;
+ float* d_send_accel_buffer;
+
+ // interfaces
+ int* d_nibool_interfaces_ext_mesh;
+ int* d_ibool_interfaces_ext_mesh;
+
+ //used for absorbing stacey boundaries
+ int* d_abs_boundary_ispec;
+ int* d_abs_boundary_ijk;
+ float* d_abs_boundary_normal;
+ float* d_rho_vp;
+ float* d_rho_vs;
+ float* d_abs_boundary_jacobian2Dw;
+ float* d_b_absorb_field;
+ int b_num_abs_boundary_faces;
+
+ // inner / outer elements
+ int* d_ispec_is_inner;
+ int* d_ispec_is_elastic;
+
+ // sources
+ float* d_sourcearrays;
+ double* d_stf_pre_compute;
+ int* d_islice_selected_source;
+ int* d_ispec_selected_source;
+
+ // receivers
+ int* d_number_receiver_global;
+ int* d_ispec_selected_rec;
+ int* d_islice_selected_rec;
+ int nrec_local;
+ float* d_station_seismo_field;
+ float* h_station_seismo_field;
+
+ // surface elements (to save for noise tomography and acoustic simulations)
+ int* d_free_surface_ispec;
+ int* d_free_surface_ijk;
+ int num_free_surface_faces;
+
+ // surface movie elements to save for noise tomography
+ float* d_noise_surface_movie;
+
+ float* d_epsilondev_xx;
+ float* d_epsilondev_yy;
+ float* d_epsilondev_xy;
+ float* d_epsilondev_xz;
+ float* d_epsilondev_yz;
+ float* d_epsilon_trace_over_3;
+
+ float* d_normal_x_noise;
+ float* d_normal_y_noise;
+ float* d_normal_z_noise;
+ float* d_mask_noise;
+ float* d_free_surface_jacobian2Dw;
+
+ float* d_noise_sourcearray;
+
+ float* d_b_epsilondev_xx;
+ float* d_b_epsilondev_yy;
+ float* d_b_epsilondev_xy;
+ float* d_b_epsilondev_xz;
+ float* d_b_epsilondev_yz;
+ float* d_b_epsilon_trace_over_3;
+
+ // sensitivity kernels
+ float* d_rho_kl;
+ float* d_mu_kl;
+ float* d_kappa_kl;
+ float* d_Sigma_kl;
+
+ // ------------------------------------------------------------------ //
+ // acoustic wavefield
+ // ------------------------------------------------------------------ //
+ // potential and first and second time derivative
+ float* d_potential_acoustic; float* d_potential_dot_acoustic; float* d_potential_dot_dot_acoustic;
+ // backward/reconstructed wavefield
+ float* d_b_potential_acoustic; float* d_b_potential_dot_acoustic; float* d_b_potential_dot_dot_acoustic;
+
+ // acoustic domain parameters
+ int* d_phase_ispec_inner_acoustic;
+ int num_phase_ispec_acoustic;
+
+ float* d_rhostore;
+ float* d_kappastore;
+ float* d_rmass_acoustic;
+
+ float* d_send_potential_dot_dot_buffer;
+ int* d_ispec_is_acoustic;
+
+ float* d_b_absorb_potential;
+ int d_b_reclen_potential;
+
+ // for writing seismograms
+ float* d_station_seismo_potential;
+ float* h_station_seismo_potential;
+
+ // sensitivity kernels
+ float* d_rho_ac_kl;
+ float* d_kappa_ac_kl;
+
+
+} Mesh;
+
+
+#endif
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography_cuda.cu)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,255 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+// #include "epik_user.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void transfer_surface_to_host_kernel(int* free_surface_ispec,int* free_surface_ijk, int num_free_surface_faces, int* ibool, real* displ, real* noise_surface_movie) {
+ int igll = threadIdx.x;
+ int iface = blockIdx.x + blockIdx.y*gridDim.x;
+
+ // int id = tx + blockIdx.x*blockDim.x + blockIdx.y*blockDim.x*gridDim.x;
+
+ if(iface < num_free_surface_faces) {
+ int ispec = free_surface_ispec[iface]-1; //-1 for C-based indexing
+
+ int i = free_surface_ijk[0+3*(igll + 25*(iface))]-1;
+ int j = free_surface_ijk[1+3*(igll + 25*(iface))]-1;
+ int k = free_surface_ijk[2+3*(igll + 25*(iface))]-1;
+
+ int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
+ noise_surface_movie[INDEX3(3,25,0,igll,iface)] = displ[iglob*3];
+ noise_surface_movie[INDEX3(3,25,1,igll,iface)] = displ[iglob*3+1];
+ noise_surface_movie[INDEX3(3,25,2,igll,iface)] = displ[iglob*3+2];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(fortranflush,FORTRANFLUSH)(int* rank){
+TRACE("fortranflush");
+
+ fflush(stdout);
+ fflush(stderr);
+ printf("Flushing proc %d!\n",*rank);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(fortranprint,FORTRANPRINT)(int* id) {
+TRACE("fortranprint");
+
+ int procid;
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ printf("%d: sends msg_id %d\n",procid,*id);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(fortranprintf,FORTRANPRINTF)(float* val) {
+TRACE("fortranprintf");
+
+ int procid;
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ printf("%d: sends val %e\n",procid,*val);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(fortranprintd,FORTRANPRINTD)(double* val) {
+TRACE("fortranprintd");
+
+ int procid;
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ printf("%d: sends val %e\n",procid,*val);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// randomize displ for testing
+extern "C"
+void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,float* h_displ) {
+TRACE("make_displ_rand");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+ // float* displ_rnd = (float*)malloc(mp->NGLOB_AB*3*sizeof(float));
+ for(int i=0;i<mp->NGLOB_AB*3;i++) {
+ h_displ[i] = rand();
+ }
+ cudaMemcpy(mp->d_displ,h_displ,mp->NGLOB_AB*3*sizeof(float),cudaMemcpyHostToDevice);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_surface_to_host,
+ TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,real* h_noise_surface_movie,int* num_free_surface_faces) {
+TRACE("transfer_surface_to_host");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+ int num_blocks_x = *num_free_surface_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y,1);
+ dim3 threads(25,1,1);
+
+ transfer_surface_to_host_kernel<<<grid,threads>>>(mp->d_free_surface_ispec,mp->d_free_surface_ijk, *num_free_surface_faces, mp->d_ibool, mp->d_displ, mp->d_noise_surface_movie);
+
+ cudaMemcpy(h_noise_surface_movie,mp->d_noise_surface_movie,3*25*(*num_free_surface_faces)*sizeof(real),cudaMemcpyDeviceToHost);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_surface_to_host");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void noise_read_add_surface_movie_cuda_kernel(real* accel, int* ibool, int* free_surface_ispec,int* free_surface_ijk, int num_free_surface_faces, real* noise_surface_movie, real* normal_x_noise, real* normal_y_noise, real* normal_z_noise, real* mask_noise, real* free_surface_jacobian2Dw, real* wgllwgll_xy,float* d_debug) {
+
+ int iface = blockIdx.x + gridDim.x*blockIdx.y; // surface element id
+
+ // when nspec_top > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
+ if(iface < num_free_surface_faces) {
+ int ispec = free_surface_ispec[iface]-1;
+
+ int igll = threadIdx.x;
+
+ int ipoin = 25*iface + igll;
+ int i=free_surface_ijk[0+3*(igll + 25*(iface))]-1;
+ int j=free_surface_ijk[1+3*(igll + 25*(iface))]-1;
+ int k=free_surface_ijk[2+3*(igll + 25*(iface))]-1;
+
+ int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
+ real normal_x = normal_x_noise[ipoin];
+ real normal_y = normal_y_noise[ipoin];
+ real normal_z = normal_z_noise[ipoin];
+
+ real eta = (noise_surface_movie[INDEX3(3,25,0,igll,iface)]*normal_x +
+ noise_surface_movie[INDEX3(3,25,1,igll,iface)]*normal_y +
+ noise_surface_movie[INDEX3(3,25,2,igll,iface)]*normal_z);
+
+ // error from cuda-memcheck and ddt seems "incorrect", because we
+ // are passing a __constant__ variable pointer around like it was
+ // made using cudaMalloc, which *may* be "incorrect", but produces
+ // correct results.
+
+ // ========= Invalid __global__ read of size
+ // 4 ========= at 0x00000cd8 in
+ // compute_add_sources_cuda.cu:260:noise_read_add_surface_movie_cuda_kernel
+ // ========= by thread (0,0,0) in block (3443,0) ========= Address
+ // 0x203000c8 is out of bounds
+
+ // non atomic version for speed testing -- atomic updates are needed for correctness
+ // accel[3*iglob] += eta*mask_noise[ipoin] * normal_x * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + 25*ispec2D];
+ // accel[3*iglob+1] += eta*mask_noise[ipoin] * normal_y * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + 25*ispec2D];
+ // accel[3*iglob+2] += eta*mask_noise[ipoin] * normal_z * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + 25*ispec2D];
+
+ // Fortran version in SVN -- note deletion of wgllwgll_xy?
+ // accel(1,iglob) = accel(1,iglob) + eta * mask_noise(ipoin) * normal_x_noise(ipoin) &
+ // * free_surface_jacobian2Dw(igll,iface)
+ // accel(2,iglob) = accel(2,iglob) + eta * mask_noise(ipoin) * normal_y_noise(ipoin) &
+ // * free_surface_jacobian2Dw(igll,iface)
+ // accel(3,iglob) = accel(3,iglob) + eta * mask_noise(ipoin) * normal_z_noise(ipoin) &
+ // * free_surface_jacobian2Dw(igll,iface) ! wgllwgll_xy(i,j) * jacobian2D_top(i,j,iface)
+
+ // atomicAdd(&accel[iglob*3] ,eta*mask_noise[ipoin]*normal_x*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
+ // atomicAdd(&accel[iglob*3+1],eta*mask_noise[ipoin]*normal_y*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
+ // atomicAdd(&accel[iglob*3+2],eta*mask_noise[ipoin]*normal_z*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
+
+ atomicAdd(&accel[iglob*3] ,eta*mask_noise[ipoin]*normal_x*free_surface_jacobian2Dw[igll+25*iface]);
+ atomicAdd(&accel[iglob*3+1],eta*mask_noise[ipoin]*normal_y*free_surface_jacobian2Dw[igll+25*iface]);
+ atomicAdd(&accel[iglob*3+2],eta*mask_noise[ipoin]*normal_z*free_surface_jacobian2Dw[igll+25*iface]);
+
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(noise_read_add_surface_movie_cuda,
+ NOISE_READ_ADD_SURFACE_MOVIE_CUDA)(long* Mesh_pointer_f, real* h_noise_surface_movie, int* num_free_surface_faces_f,int* NOISE_TOMOGRAPHYf) {
+TRACE("noise_read_add_surface_movie_cuda");
+
+ // EPIK_TRACER("noise_read_add_surface_movie_cuda");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ int num_free_surface_faces = *num_free_surface_faces_f;
+ int NOISE_TOMOGRAPHY = *NOISE_TOMOGRAPHYf;
+ float* d_noise_surface_movie;
+ cudaMalloc((void**)&d_noise_surface_movie,3*25*num_free_surface_faces*sizeof(float));
+ cudaMemcpy(d_noise_surface_movie, h_noise_surface_movie,3*25*num_free_surface_faces*sizeof(real),cudaMemcpyHostToDevice);
+
+ int num_blocks_x = num_free_surface_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y,1);
+ dim3 threads(25,1,1);
+
+ // float* h_debug = (float*)calloc(128,sizeof(float));
+ float* d_debug;
+ // cudaMalloc((void**)&d_debug,128*sizeof(float));
+ // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
+
+ if(NOISE_TOMOGRAPHY == 2) { // add surface source to forward field
+ noise_read_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_accel,
+ mp->d_ibool,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ num_free_surface_faces,
+ d_noise_surface_movie,
+ mp->d_normal_x_noise,
+ mp->d_normal_y_noise,
+ mp->d_normal_z_noise,
+ mp->d_mask_noise,
+ mp->d_free_surface_jacobian2Dw,
+ mp->d_wgllwgll_xy,
+ d_debug);
+ }
+ else if(NOISE_TOMOGRAPHY==3) { // add surface source to adjoint (backward) field
+ noise_read_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_b_accel,
+ mp->d_ibool,
+ mp->d_free_surface_ispec,
+ mp->d_free_surface_ijk,
+ num_free_surface_faces,
+ d_noise_surface_movie,
+ mp->d_normal_x_noise,
+ mp->d_normal_y_noise,
+ mp->d_normal_z_noise,
+ mp->d_mask_noise,
+ mp->d_free_surface_jacobian2Dw,
+ mp->d_wgllwgll_xy,
+ d_debug);
+ }
+
+
+ // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
+ // for(int i=0;i<8;i++) {
+ // printf("debug[%d]= %e\n",i,h_debug[i]);
+ // }
+ // MPI_Abort(MPI_COMM_WORLD,1);
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("noise_read_add_surface_movie_cuda_kernel");
+#endif
+
+ cudaFree(d_noise_surface_movie);
+}
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_constants_cuda.h)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,98 @@
+#ifndef CUDA_HEADER_H
+#define CUDA_HEADER_H
+/* CUDA specific things from specfem3D_kernels.cu */
+
+//#define NGLL2 25
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+#ifdef USE_TEXTURES
+// declaration of textures
+texture<float, 1, cudaReadModeElementType> tex_displ;
+texture<float, 1, cudaReadModeElementType> tex_accel;
+
+texture<float, 1, cudaReadModeElementType> tex_potential_acoustic;
+texture<float, 1, cudaReadModeElementType> tex_potential_dot_dot_acoustic;
+
+// for binding the textures
+
+ void bindTexturesDispl(float* d_displ)
+ {
+ cudaError_t err;
+
+ cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<float>();
+
+ err = cudaBindTexture(NULL,tex_displ, d_displ, channelDescFloat, NDIM*NGLOB*sizeof(float));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in bindTexturesDispl for displ: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ }
+
+ void bindTexturesAccel(float* d_accel)
+ {
+ cudaError_t err;
+
+ cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<float>();
+
+ err = cudaBindTexture(NULL,tex_accel, d_accel, channelDescFloat, NDIM*NGLOB*sizeof(float));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in bindTexturesAccel for accel: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ }
+
+ void bindTexturesPotential(float* d_potential_acoustic)
+ {
+ cudaError_t err;
+
+ cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<float>();
+
+ err = cudaBindTexture(NULL,tex_potential_acoustic, d_potential_acoustic,
+ channelDescFloat, NGLOB*sizeof(float));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in bindTexturesPotential for potential_acoustic: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ }
+
+ void bindTexturesPotential_dot_dot(float* d_potential_dot_dot_acoustic)
+ {
+ cudaError_t err;
+
+ cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<float>();
+
+ err = cudaBindTexture(NULL,tex_potential_dot_dot_acoustic, d_potential_dot_dot_acoustic,
+ channelDescFloat, NGLOB*sizeof(float));
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr, "Error in bindTexturesPotential_dot_dot for potential_dot_dot_acoustic: %s\n", cudaGetErrorString(err));
+ exit(1);
+ }
+ }
+
+#endif
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// setters for these const arrays (very ugly hack, but will have to do)
+
+// elastic
+void setConst_hprime_xx(float* array,Mesh* mp);
+void setConst_hprime_yy(float* array,Mesh* mp);
+void setConst_hprime_zz(float* array,Mesh* mp);
+
+void setConst_hprimewgll_xx(float* array,Mesh* mp);
+
+void setConst_wgllwgll_xy(float* array,Mesh* mp);
+void setConst_wgllwgll_xz(float* array, Mesh* mp);
+void setConst_wgllwgll_yz(float* array, Mesh* mp);
+
+void exit_on_cuda_error(char* kernel_name);
+void show_free_memory(char* info_str);
+
+#endif //CUDA_HEADER_H
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_mesh_constants_cuda.cu)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,1126 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+#include "prepare_constants_cuda.h"
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Helper functions
+
+/* ----------------------------------------------------------------------------------------------- */
+
+double get_time()
+{
+ struct timeval t;
+ struct timezone tzp;
+ gettimeofday(&t, &tzp);
+ return t.tv_sec + t.tv_usec*1e-6;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(pause_for_debug,PAUSE_FOR_DEBUG)() {
+TRACE("pause_for_debug");
+
+ pause_for_debugger(1);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void pause_for_debugger(int pause) {
+ if(pause) {
+ int myrank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+ printf("I'm rank %d\n",myrank);
+ int i = 0;
+ char hostname[256];
+ gethostname(hostname, sizeof(hostname));
+ printf("PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
+ FILE *file = fopen("/scratch/eiger/rietmann/attach_gdb.txt","w+");
+ fprintf(file,"PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
+ fclose(file);
+ fflush(stdout);
+ while (0 == i)
+ sleep(5);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void exit_on_cuda_error(char* kernel_name) {
+ // sync and check to catch errors from previous async operations
+ cudaThreadSynchronize();
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess)
+ {
+ fprintf(stderr,"Error after %s: %s\n", kernel_name, cudaGetErrorString(err));
+ pause_for_debugger(0);
+ exit(1);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void print_CUDA_error_if_any(cudaError_t err, int num)
+{
+ if (cudaSuccess != err)
+ {
+ printf("\nCUDA error !!!!! <%s> !!!!! at CUDA call # %d\n",cudaGetErrorString(err),num);
+ fflush(stdout);
+#ifdef USE_MPI
+ MPI_Abort(MPI_COMM_WORLD,1);
+#endif
+ exit(0);
+ }
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void get_free_memory(double* free_db, double* used_db, double* total_db) {
+
+ // gets memory usage in byte
+ size_t free_byte ;
+ size_t total_byte ;
+ cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
+ if ( cudaSuccess != cuda_status ){
+ printf("Error: cudaMemGetInfo fails, %s \n", cudaGetErrorString(cuda_status) );
+ exit(1);
+ }
+
+ *free_db = (double)free_byte ;
+ *total_db = (double)total_byte ;
+ *used_db = *total_db - *free_db ;
+ return;
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Saves GPU memory usage to file
+void output_free_memory(char* info_str) {
+ int myrank;
+ MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
+ FILE* fp;
+ char filename[BUFSIZ];
+ double free_db,used_db,total_db;
+
+ get_free_memory(&free_db,&used_db,&total_db);
+ // size_t free_byte ;
+ // size_t total_byte ;
+ // cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
+ // if ( cudaSuccess != cuda_status ){
+ // printf("Error: cudaMemGetInfo fails, %s \n", cudaGetErrorString(cuda_status) );
+ // exit(1);
+ // }
+ //
+ // double free_db = (double)free_byte ;
+ // double total_db = (double)total_byte ;
+ // double used_db = total_db - free_db ;
+
+ sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_mem_usage_proc_%03d.txt",myrank);
+ fp = fopen(filename,"a+");
+ fprintf(fp,"%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
+ used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+ fclose(fp);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Fortran-callable version of above method
+extern "C"
+void FC_FUNC_(output_free_device_memory,
+ OUTPUT_FREE_DEVICE_MEMORY)(int* id) {
+TRACE("output_free_device_memory");
+
+ char info[6];
+ sprintf(info,"f %d:",*id);
+ output_free_memory(info);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void show_free_memory(char* info_str) {
+
+ // show memory usage of GPU
+ int myrank;
+ MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
+ double free_db,used_db,total_db;
+
+ get_free_memory(&free_db,&used_db,&total_db);
+
+// size_t free_byte ;
+// size_t total_byte ;
+// cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
+// if ( cudaSuccess != cuda_status ){
+// printf("Error: cudaMemGetInfo fails, %s \n", cudaGetErrorString(cuda_status) );
+// exit(1);
+// }
+//
+// double free_db = (double)free_byte ;
+// double total_db = (double)total_byte ;
+// double used_db = total_db - free_db ;
+
+ printf("%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
+ used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(show_free_device_memory,
+ SHOW_FREE_DEVICE_MEMORY)() {
+TRACE("show_free_device_memory");
+
+ show_free_memory("from fortran");
+}
+
+
+extern "C"
+void FC_FUNC_(get_free_device_memory,
+ get_FREE_DEVICE_MEMORY)(float* free, float* used, float* total ) {
+TRACE("get_free_device_memory");
+
+ double free_db,used_db,total_db;
+
+ get_free_memory(&free_db,&used_db,&total_db);
+
+ // converts to MB
+ *free = (float) free_db/1024.0/1024.0;
+ *used = (float) used_db/1024.0/1024.0;
+ *total = (float) total_db/1024.0/1024.0;
+ return;
+}
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// GPU preparation
+
+/* ----------------------------------------------------------------------------------------------- */
+
+//void prepare_constants(int NGLLX, int NSPEC_AB, int NGLOB_AB,
+// float* h_xix, float* h_xiy, float* h_xiz,
+// float** d_xix, float** d_xiy, float** d_xiz,
+// float* h_etax, float* h_etay, float* h_etaz,
+// float** d_etax, float** d_etay, float** d_etaz,
+// float* h_gammax, float* h_gammay, float* h_gammaz,
+// float** d_gammax, float** d_gammay, float** d_gammaz,
+// float* h_kappav, float* h_muv,
+// float** d_kappav, float** d_muv,
+// int* h_ibool, int** d_ibool,
+// //int* h_phase_ispec_inner_elastic, int** d_phase_ispec_inner_elastic,
+// //int num_phase_ispec_elastic,
+// //float* h_rmass, float** d_rmass,
+// int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
+// int* h_nibool_interfaces_ext_mesh, int** d_nibool_interfaces_ext_mesh,
+// int* h_ibool_interfaces_ext_mesh, int** d_ibool_interfaces_ext_mesh,
+// float* h_hprime_xx, float* h_hprimewgll_xx,
+// float* h_wgllwgll_xy, float* h_wgllwgll_xz,
+// float* h_wgllwgll_yz,
+// int* h_abs_boundary_ispec, int** d_abs_boundary_ispec,
+// int* h_abs_boundary_ijk, int** d_abs_boundary_ijk,
+// float* h_abs_boundary_normal, float** d_abs_boundary_normal,
+// //float* h_rho_vp,float** d_rho_vp,
+// //float* h_rho_vs,float** d_rho_vs,
+// float* h_abs_boundary_jacobian2Dw,float** d_abs_boundary_jacobian2Dw,
+// float* h_b_absorb_field,float** d_b_absorb_field,
+// int num_abs_boundary_faces, int b_num_abs_boundary_faces,
+// int* h_ispec_is_inner, int** d_ispec_is_inner,
+// //int* h_ispec_is_elastic, int** d_ispec_is_elastic,
+// int NSOURCES,
+// float* h_sourcearrays,float** d_sourcearrays,
+// int* h_islice_selected_source, int** d_islice_selected_source,
+// int* h_ispec_selected_source, int** d_ispec_selected_source,
+// int SIMULATION_TYPE){
+//
+//TRACE("prepare_constants");
+//
+// // EPIK_USER_REG(r_name,"compute_forces");
+// // EPIK_USER_REG(r_name,
+//
+// /* Assuming NGLLX=5. Padded is then 128 (5^3+3) */
+// int size_padded = 128*NSPEC_AB;
+// int size = NGLLX*NGLLX*NGLLX*NSPEC_AB;
+//
+// // mesh
+// print_CUDA_error_if_any(cudaMalloc((void**) d_xix, size_padded*sizeof(float)),5);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_xiy, size_padded*sizeof(float)),6);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_xiz, size_padded*sizeof(float)),7);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_etax, size_padded*sizeof(float)),8);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_etay, size_padded*sizeof(float)),9);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_etaz, size_padded*sizeof(float)),10);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_gammax, size_padded*sizeof(float)),11);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_gammay, size_padded*sizeof(float)),12);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_gammaz, size_padded*sizeof(float)),13);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_kappav, size_padded*sizeof(float)),14);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_muv, size_padded*sizeof(float)),15);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_ibool, size_padded*sizeof(int)),16);
+//
+//// print_CUDA_error_if_any(cudaMalloc((void**) d_phase_ispec_inner_elastic, num_phase_ispec_elastic*2*sizeof(int)),17);
+//// print_CUDA_error_if_any(cudaMalloc((void**) d_rmass, NGLOB_AB*sizeof(float)),17);
+//
+// // absorbing boundaries
+// if( num_abs_boundary_faces > 0 ){
+// print_CUDA_error_if_any(cudaMalloc((void**) d_abs_boundary_ispec,
+// num_abs_boundary_faces*sizeof(int)),769);
+// print_CUDA_error_if_any(cudaMemcpy(*d_abs_boundary_ispec, h_abs_boundary_ispec,
+// num_abs_boundary_faces*sizeof(int),cudaMemcpyHostToDevice),770);
+//
+// print_CUDA_error_if_any(cudaMalloc((void**) d_abs_boundary_ijk,
+// 3*25*num_abs_boundary_faces*sizeof(int)),772);
+// print_CUDA_error_if_any(cudaMemcpy(*d_abs_boundary_ijk, h_abs_boundary_ijk,
+// 3*25*num_abs_boundary_faces*sizeof(int),cudaMemcpyHostToDevice),773);
+//
+// print_CUDA_error_if_any(cudaMalloc((void**) d_abs_boundary_normal,
+// 3*25*num_abs_boundary_faces*sizeof(int)),783);
+// print_CUDA_error_if_any(cudaMemcpy(*d_abs_boundary_normal, h_abs_boundary_normal,
+// 3*25*num_abs_boundary_faces*sizeof(int),cudaMemcpyHostToDevice),783);
+//
+// print_CUDA_error_if_any(cudaMalloc((void**) d_abs_boundary_jacobian2Dw,
+// 25*num_abs_boundary_faces*sizeof(float)),784);
+// print_CUDA_error_if_any(cudaMemcpy(*d_abs_boundary_jacobian2Dw, h_abs_boundary_jacobian2Dw,
+// 25*num_abs_boundary_faces*sizeof(float),cudaMemcpyHostToDevice),784);
+// }
+//
+//
+///*
+// print_CUDA_error_if_any(cudaMalloc((void**) d_rho_vp, size*sizeof(float)),5);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_rho_vs, size*sizeof(float)),6);
+// print_CUDA_error_if_any(cudaMemcpy(*d_rho_vp,h_rho_vp,size*sizeof(float),
+// cudaMemcpyHostToDevice),5);
+// print_CUDA_error_if_any(cudaMemcpy(*d_rho_vs,h_rho_vs,size*sizeof(float),
+// cudaMemcpyHostToDevice),5);
+// print_CUDA_error_if_any(cudaMalloc((void**) d_b_absorb_field, 3*25*b_num_abs_boundary_faces*sizeof(float)),7);
+// print_CUDA_error_if_any(cudaMemcpy(*d_b_absorb_field, h_b_absorb_field,
+// 3*25*b_num_abs_boundary_faces*sizeof(float),
+// cudaMemcpyHostToDevice),7);
+//
+// print_CUDA_error_if_any(cudaMemcpy(*d_rmass,h_rmass,NGLOB_AB*sizeof(float),cudaMemcpyHostToDevice),18);
+//*/
+//
+// // prepare interprocess-edge exchange information
+// print_CUDA_error_if_any(cudaMalloc((void**) d_nibool_interfaces_ext_mesh,
+// num_interfaces_ext_mesh*sizeof(int)),19);
+// print_CUDA_error_if_any(cudaMemcpy(*d_nibool_interfaces_ext_mesh,h_nibool_interfaces_ext_mesh,
+// num_interfaces_ext_mesh*sizeof(int),cudaMemcpyHostToDevice),19);
+//
+// print_CUDA_error_if_any(cudaMalloc((void**) d_ibool_interfaces_ext_mesh,
+// num_interfaces_ext_mesh*max_nibool_interfaces_ext_mesh*sizeof(int)),20);
+// print_CUDA_error_if_any(cudaMemcpy(*d_ibool_interfaces_ext_mesh,h_ibool_interfaces_ext_mesh,
+// num_interfaces_ext_mesh*max_nibool_interfaces_ext_mesh*sizeof(int),
+// cudaMemcpyHostToDevice),20);
+//
+// print_CUDA_error_if_any(cudaMalloc((void**) d_ispec_is_inner,NSPEC_AB*sizeof(int)),21);
+// print_CUDA_error_if_any(cudaMemcpy(*d_ispec_is_inner, h_ispec_is_inner,
+// NSPEC_AB*sizeof(int),
+// cudaMemcpyHostToDevice),21);
+//
+///* print_CUDA_error_if_any(cudaMalloc((void**) d_ispec_is_elastic,NSPEC_AB*sizeof(int)),21);
+// print_CUDA_error_if_any(cudaMemcpy(*d_ispec_is_elastic, h_ispec_is_elastic,
+// NSPEC_AB*sizeof(int),
+// cudaMemcpyHostToDevice),21);
+//*/
+//
+// print_CUDA_error_if_any(cudaMemcpy(*d_ibool, h_ibool,
+// size*sizeof(int) ,cudaMemcpyHostToDevice),512);
+//
+// // sources
+// if (SIMULATION_TYPE == 1 || SIMULATION_TYPE == 3){
+// print_CUDA_error_if_any(cudaMalloc((void**)d_sourcearrays, sizeof(float)*NSOURCES*3*125),22);
+// print_CUDA_error_if_any(cudaMemcpy(*d_sourcearrays, h_sourcearrays, sizeof(float)*NSOURCES*3*125,
+// cudaMemcpyHostToDevice),522);
+// }
+//
+//
+// print_CUDA_error_if_any(cudaMalloc((void**)d_islice_selected_source, sizeof(int)*NSOURCES),23);
+// print_CUDA_error_if_any(cudaMemcpy(*d_islice_selected_source, h_islice_selected_source, sizeof(int)*NSOURCES,
+// cudaMemcpyHostToDevice),523);
+//
+// print_CUDA_error_if_any(cudaMalloc((void**)d_ispec_selected_source, sizeof(int)*NSOURCES),24);
+// print_CUDA_error_if_any(cudaMemcpy(*d_ispec_selected_source, h_ispec_selected_source,sizeof(int)*NSOURCES,
+// cudaMemcpyHostToDevice),524);
+//
+// // transfer constant element data with padding
+// for(int i=0;i<NSPEC_AB;i++) {
+// print_CUDA_error_if_any(cudaMemcpy(*d_xix + i*128, &h_xix[i*125],
+// 125*sizeof(float),cudaMemcpyHostToDevice),70);
+// print_CUDA_error_if_any(cudaMemcpy(*d_xiy+i*128, &h_xiy[i*125],
+// 125*sizeof(float),cudaMemcpyHostToDevice),71);
+// print_CUDA_error_if_any(cudaMemcpy(*d_xiz+i*128, &h_xiz[i*125],
+// 125*sizeof(float),cudaMemcpyHostToDevice),72);
+// print_CUDA_error_if_any(cudaMemcpy(*d_etax+i*128, &h_etax[i*125],
+// 125*sizeof(float),cudaMemcpyHostToDevice),73);
+// print_CUDA_error_if_any(cudaMemcpy(*d_etay+i*128, &h_etay[i*125],
+// 125*sizeof(float),cudaMemcpyHostToDevice),74);
+// print_CUDA_error_if_any(cudaMemcpy(*d_etaz+i*128, &h_etaz[i*125],
+// 125*sizeof(float),cudaMemcpyHostToDevice),75);
+// print_CUDA_error_if_any(cudaMemcpy(*d_gammax+i*128,&h_gammax[i*125],
+// 125*sizeof(float),cudaMemcpyHostToDevice),76);
+// print_CUDA_error_if_any(cudaMemcpy(*d_gammay+i*128,&h_gammay[i*125],
+// 125*sizeof(float),cudaMemcpyHostToDevice),78);
+// print_CUDA_error_if_any(cudaMemcpy(*d_gammaz+i*128,&h_gammaz[i*125],
+// 125*sizeof(float),cudaMemcpyHostToDevice),79);
+// print_CUDA_error_if_any(cudaMemcpy(*d_kappav+i*128,&h_kappav[i*125],
+// 125*sizeof(float),cudaMemcpyHostToDevice),80);
+// print_CUDA_error_if_any(cudaMemcpy(*d_muv+i*128, &h_muv[i*125],
+// 125*sizeof(float),cudaMemcpyHostToDevice),81);
+// }
+//
+//// print_CUDA_error_if_any(cudaMemcpy(*d_phase_ispec_inner_elastic, h_phase_ispec_inner_elastic, num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),13);
+//
+//#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+// exit_on_cuda_error("prepare_constants");
+//#endif
+//}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_constants_device,
+ PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
+ int* h_NGLLX,
+ int* NSPEC_AB, int* NGLOB_AB,
+ float* h_xix, float* h_xiy, float* h_xiz,
+ float* h_etax, float* h_etay, float* h_etaz,
+ float* h_gammax, float* h_gammay, float* h_gammaz,
+ float* h_kappav, float* h_muv,
+ int* h_ibool,
+ //int* h_phase_ispec_inner_elastic,int* num_phase_ispec_elastic,
+ //float* h_rmass,
+ int* num_interfaces_ext_mesh, int* max_nibool_interfaces_ext_mesh,
+ int* h_nibool_interfaces_ext_mesh, int* h_ibool_interfaces_ext_mesh,
+ float* h_hprime_xx,
+ float* h_hprime_yy,
+ float* h_hprime_zz,
+ float* h_hprimewgll_xx,
+ float* h_wgllwgll_xy,
+ float* h_wgllwgll_xz,
+ float* h_wgllwgll_yz,
+ //float* h_hprime_xx, float* h_hprimewgll_xx,
+ //float* h_wgllwgll_xy, float* h_wgllwgll_xz,
+ //float* h_wgllwgll_yz,
+ int* h_abs_boundary_ispec, int* h_abs_boundary_ijk,
+ float* h_abs_boundary_normal,
+ //float* h_rho_vp,
+ //float* h_rho_vs,
+ float* h_abs_boundary_jacobian2Dw,
+ float* h_b_absorb_field,
+ int* num_abs_boundary_faces, int* b_num_abs_boundary_faces,
+ int* h_ispec_is_inner,
+ //int* h_ispec_is_elastic,
+ int* NSOURCES,
+ float* h_sourcearrays,
+ int* h_islice_selected_source,
+ int* h_ispec_selected_source,
+ int* h_number_receiver_global,
+ int* h_ispec_selected_rec,
+ int* nrec_f,
+ int* nrec_local_f,
+ int* SIMULATION_TYPE) {
+
+TRACE("prepare_constants_device");
+
+ int device_count,procid;
+
+ // cuda initialization (needs -lcuda library)
+ cuInit(0);
+
+ // Gets number of GPU devices
+ cudaGetDeviceCount(&device_count);
+ //printf("Cuda Devices: %d\n", device_count);
+
+ // Gets rank number of MPI process
+ MPI_Comm_rank(MPI_COMM_WORLD, &procid);
+
+ // Sets the active device
+ if(device_count > 1) {
+ // daniel: todo - generalize for more GPUs per node?
+ // assumes we have 2 GPU devices per node and running 2 MPI processes per node as well
+ cudaSetDevice((procid)%2);
+ exit_on_cuda_error("cudaSetDevice");
+ }
+
+ //printf("GPU_MODE Active. Preparing Fields and Constants on Device.\n");
+
+ // allocates mesh parameter structure
+ Mesh* mp = (Mesh*)malloc(sizeof(Mesh));
+ *Mesh_pointer = (long)mp;
+
+ // checks if NGLLX == 5
+ if( *h_NGLLX != NGLLX ){
+ exit_on_cuda_error("NGLLX must be 5 for CUDA devices");
+ }
+
+ // sets global parameters
+ mp->NSPEC_AB = *NSPEC_AB;
+ mp->NGLOB_AB = *NGLOB_AB;
+
+ //mp->d_num_phase_ispec_elastic = *num_phase_ispec_elastic;
+
+ // sets constant arrays
+ setConst_hprime_xx(h_hprime_xx,mp);
+ setConst_hprime_yy(h_hprime_yy,mp);
+ setConst_hprime_zz(h_hprime_zz,mp);
+ setConst_hprimewgll_xx(h_hprimewgll_xx,mp);
+ setConst_wgllwgll_xy(h_wgllwgll_xy,mp);
+ setConst_wgllwgll_xz(h_wgllwgll_xz,mp);
+ setConst_wgllwgll_yz(h_wgllwgll_yz,mp);
+
+/* setConst_hprime_xx (h_hprime_xx );
+ setConst_hprimewgll_xx(h_hprimewgll_xx);
+ setConst_wgllwgll_xy (h_wgllwgll_xy,mp);
+ setConst_wgllwgll_xz (h_wgllwgll_xz,mp);
+ setConst_wgllwgll_yz (h_wgllwgll_yz,mp);
+*/
+
+ /* Assuming NGLLX=5. Padded is then 128 (5^3+3) */
+ int size_padded = 128 * (*NSPEC_AB);
+ int size = 125 * (*NSPEC_AB);
+
+ // mesh
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix, size_padded*sizeof(float)),5);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy, size_padded*sizeof(float)),6);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz, size_padded*sizeof(float)),7);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax, size_padded*sizeof(float)),8);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay, size_padded*sizeof(float)),9);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz, size_padded*sizeof(float)),10);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax, size_padded*sizeof(float)),11);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay, size_padded*sizeof(float)),12);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz, size_padded*sizeof(float)),13);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappav, size_padded*sizeof(float)),14);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_muv, size_padded*sizeof(float)),15);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool, size_padded*sizeof(int)),16);
+
+
+// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_phase_ispec_inner_elastic, *num_phase_ispec_elastic*2*sizeof(int)),17);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic, h_phase_ispec_inner_elastic, *num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),13);
+
+// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rmass, *NGLOB_AB*sizeof(float)),17);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,h_rmass,*NGLOB_AB*sizeof(float),cudaMemcpyHostToDevice),18);
+
+ // absorbing boundaries
+ if( *num_abs_boundary_faces > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_ispec,
+ (*num_abs_boundary_faces)*sizeof(int)),771);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ispec, h_abs_boundary_ispec,
+ (*num_abs_boundary_faces)*sizeof(int),
+ cudaMemcpyHostToDevice),771);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_ijk,
+ 3*25*(*num_abs_boundary_faces)*sizeof(int)),772);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ijk, h_abs_boundary_ijk,
+ 3*25*(*num_abs_boundary_faces)*sizeof(int),
+ cudaMemcpyHostToDevice),772);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_normal,
+ 3*25*(*num_abs_boundary_faces)*sizeof(int)),773);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_normal, h_abs_boundary_normal,
+ 3*25*(*num_abs_boundary_faces)*sizeof(int),
+ cudaMemcpyHostToDevice),773);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_jacobian2Dw,
+ 25*(*num_abs_boundary_faces)*sizeof(float)),774);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_jacobian2Dw, h_abs_boundary_jacobian2Dw,
+ 25*(*num_abs_boundary_faces)*sizeof(float),
+ cudaMemcpyHostToDevice),774);
+ }
+
+/*
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rho_vp, size*sizeof(float)),5);
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rho_vs, size*sizeof(float)),6);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp,h_rho_vp,size*sizeof(float),
+ cudaMemcpyHostToDevice),5);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs,h_rho_vs,size*sizeof(float),
+ cudaMemcpyHostToDevice),5);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_absorb_field, 3*25* *b_num_abs_boundary_faces*sizeof(float)),7);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field,
+ 3*25* *b_num_abs_boundary_faces*sizeof(float),
+ cudaMemcpyHostToDevice),7);
+*/
+
+ // prepare interprocess-edge exchange information
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_ext_mesh,
+ *num_interfaces_ext_mesh*sizeof(int)),19);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_ext_mesh,h_nibool_interfaces_ext_mesh,
+ *num_interfaces_ext_mesh*sizeof(int),cudaMemcpyHostToDevice),19);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_ext_mesh,
+ *num_interfaces_ext_mesh* *max_nibool_interfaces_ext_mesh*
+ sizeof(int)),20);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_ext_mesh,h_ibool_interfaces_ext_mesh,
+ *num_interfaces_ext_mesh* *max_nibool_interfaces_ext_mesh*sizeof(int),
+ cudaMemcpyHostToDevice),20);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_inner,*NSPEC_AB*sizeof(int)),21);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_inner, h_ispec_is_inner,
+ *NSPEC_AB*sizeof(int),
+ cudaMemcpyHostToDevice),21);
+
+// print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_elastic,*NSPEC_AB*sizeof(int)),21);
+// print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_elastic, h_ispec_is_elastic,
+// *NSPEC_AB*sizeof(int),
+// cudaMemcpyHostToDevice),21);
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool, h_ibool,
+ size*sizeof(int) ,cudaMemcpyHostToDevice),22);
+
+ // sources
+ if (*SIMULATION_TYPE == 1 || *SIMULATION_TYPE == 3){
+ // not needed in case of pure adjoint simulations (SIMULATION_TYPE == 2)
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_sourcearrays, sizeof(float)* *NSOURCES*3*125),522);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_sourcearrays, h_sourcearrays, sizeof(float)* *NSOURCES*3*125,
+ cudaMemcpyHostToDevice),522);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_stf_pre_compute),
+ *NSOURCES*sizeof(double)),525);
+ }
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_source, sizeof(int) * *NSOURCES),523);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_source, h_islice_selected_source, sizeof(int)* *NSOURCES,
+ cudaMemcpyHostToDevice),523);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_ispec_selected_source, sizeof(int)* *NSOURCES),524);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_source, h_ispec_selected_source,sizeof(int)* *NSOURCES,
+ cudaMemcpyHostToDevice),524);
+
+
+ // transfer constant element data with padding
+ for(int i=0;i<*NSPEC_AB;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xix + i*128, &h_xix[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),70);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xiy+i*128, &h_xiy[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),71);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_xiz+i*128, &h_xiz[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),72);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etax+i*128, &h_etax[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),73);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etay+i*128, &h_etay[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),74);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_etaz+i*128, &h_etaz[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),75);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammax+i*128,&h_gammax[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),76);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammay+i*128,&h_gammay[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),77);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaz+i*128,&h_gammaz[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),78);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_kappav+i*128,&h_kappav[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),79);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_muv+i*128, &h_muv[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),80);
+
+ }
+
+
+ // receiver stations
+ int nrec = *nrec_f; // total number of receivers
+ int nrec_local = *nrec_local_f; // number of receiver located in this partition
+ // note that:
+ // size(number_receiver_global) = nrec_local
+ // size(ispec_selected_rec) = nrec
+ mp->nrec_local = nrec_local;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_number_receiver_global),nrec_local*sizeof(int)),1);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_number_receiver_global,h_number_receiver_global,
+ nrec_local*sizeof(int),cudaMemcpyHostToDevice),602);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_selected_rec),nrec*sizeof(int)),603);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_rec,h_ispec_selected_rec,
+ nrec*sizeof(int),cudaMemcpyHostToDevice),604);
+
+// print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_field),3*125*nrec_local*sizeof(float)),605);
+// mp->h_station_seismo_field = (float*)malloc(3*125*nrec_local*sizeof(float));
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_constants_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_and_transfer_noise_backward_fields,
+ PREPARE_AND_TRANSFER_NOISE_BACKWARD_FIELDS)(long* Mesh_pointer_f,
+ int* size,
+ real* b_displ,
+ real* b_veloc,
+ real* b_accel,
+ real* b_epsilondev_xx,
+ real* b_epsilondev_yy,
+ real* b_epsilondev_xy,
+ real* b_epsilondev_xz,
+ real* b_epsilondev_yz,
+ int* NSPEC_STRAIN_ONLY) {
+
+TRACE("prepare_and_transfer_noise_backward_fields_");
+
+ //show_free_memory("prep_and_xfer_noise_bwd_fields");
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+ int epsilondev_size = 128*(*NSPEC_STRAIN_ONLY);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ),*size*sizeof(real)),1);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc),*size*sizeof(real)),2);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel),*size*sizeof(real)),3);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx),
+ epsilondev_size*sizeof(real)),4);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy),
+ epsilondev_size*sizeof(real)),4);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy),
+ epsilondev_size*sizeof(real)),4);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz),
+ epsilondev_size*sizeof(real)),4);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz),
+ epsilondev_size*sizeof(real)),4);
+
+
+ cudaMemcpy(mp->d_b_displ,b_displ,*size*sizeof(real),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_veloc,b_veloc,*size*sizeof(real),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_accel,b_accel,*size*sizeof(real),cudaMemcpyHostToDevice);
+
+ cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,
+ epsilondev_size*sizeof(real),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,
+ epsilondev_size*sizeof(real),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,
+ epsilondev_size*sizeof(real),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,
+ epsilondev_size*sizeof(real),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,
+ epsilondev_size*sizeof(real),cudaMemcpyHostToDevice);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_and_transfer_noise_backward_constants,
+ PREPARE_AND_TRANSFER_NOISE_BACKWARD_CONSTANTS)(long* Mesh_pointer_f,
+ float* normal_x_noise,
+ float* normal_y_noise,
+ float* normal_z_noise,
+ float* mask_noise,
+ float* free_surface_jacobian2Dw,
+ int* nfaces_surface_ext_mesh
+ ) {
+
+TRACE("prepare_and_transfer_noise_backward_constants_");
+
+ //show_free_memory("prep_and_xfer_noise_bwd_constants");
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ int nface_size = 5*5*(*nfaces_surface_ext_mesh);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_x_noise,
+ nface_size*sizeof(float)),1);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_y_noise,
+ nface_size*sizeof(float)),2);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise,
+ nface_size*sizeof(float)),3);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise, nface_size*sizeof(float)),4);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_free_surface_jacobian2Dw,
+ nface_size*sizeof(float)),5);
+
+ cudaMemcpy(mp->d_normal_x_noise, normal_x_noise, nface_size*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_normal_y_noise, normal_y_noise, nface_size*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_normal_z_noise, normal_z_noise, nface_size*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_mask_noise, mask_noise, nface_size*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_free_surface_jacobian2Dw, free_surface_jacobian2Dw, nface_size*sizeof(float),cudaMemcpyHostToDevice);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ printf("jacobian_size = %d\n",25*(*nfaces_surface_ext_mesh));
+ exit_on_cuda_error("prepare_and_transfer_noise_backward_constants_");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_noise_constants_device,
+ PREPARE_NOISE_CONSTANTS_DEVICE)(long* Mesh_pointer_f,
+ int* h_NGLLX,
+ int* NSPEC_AB, int* NGLOB_AB,
+ int* free_surface_ispec,int* free_surface_ijk,
+ int* num_free_surface_faces,
+ int* size_free_surface_ijk, int* SIMULATION_TYPE) {
+
+TRACE("prepare_noise_constants_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ mp->num_free_surface_faces = *num_free_surface_faces;
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ispec, *num_free_surface_faces*sizeof(int)),1);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec, free_surface_ispec, *num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),1);
+
+ // alloc storage for the surface buffer to be copied
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie, 3*25*(*num_free_surface_faces)*sizeof(float)),1);
+
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ijk, (*size_free_surface_ijk)*sizeof(float)),1);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,(*size_free_surface_ijk)*sizeof(float),cudaMemcpyHostToDevice),1);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_sensitivity_kernels,
+ PREPARE_SENSITIVITY_KERNELS)(long* Mesh_pointer_f,
+ float* rho_kl,
+ float* mu_kl,
+ float* kappa_kl,
+ float* epsilon_trace_over_3,
+ float* b_epsilon_trace_over_3,
+ float* Sigma_kl,
+ int* NSPEC_ADJOINTf) {
+
+TRACE("prepare_sensitivity_kernels_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+ int NSPEC_ADJOINT = *NSPEC_ADJOINTf;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_kl),
+ 125*mp->NSPEC_AB*sizeof(float)),800);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_mu_kl),
+ 125*mp->NSPEC_AB*sizeof(float)),801);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_kl),
+ 125*mp->NSPEC_AB*sizeof(float)),802);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3),
+ 125*mp->NSPEC_AB*sizeof(float)),803);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3),
+ 125*mp->NSPEC_AB*sizeof(float)),804);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_Sigma_kl),
+ 125*(mp->NSPEC_AB)*sizeof(float)),805);
+
+ cudaMemcpy(mp->d_rho_kl,rho_kl, 125*NSPEC_ADJOINT*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_mu_kl,mu_kl, 125*NSPEC_ADJOINT*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_kappa_kl,kappa_kl, 125*NSPEC_ADJOINT*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3,
+ 125*NSPEC_ADJOINT*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3,
+ 125*NSPEC_ADJOINT*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_Sigma_kl, Sigma_kl, 125*(NSPEC_ADJOINT)*sizeof(float),
+ cudaMemcpyHostToDevice);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_sensitivity_kernels");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_adjoint_constants_device,
+ PREPARE_ADJOINT_CONSTANTS_DEVICE)(long* Mesh_pointer_f,
+ //int* ispec_selected_rec,
+ //int* islice_selected_rec,
+ //int* islice_selected_rec_size,
+ //int* nrec,
+ float* noise_sourcearray,
+ int* NSTEP,
+ float* epsilondev_xx,
+ float* epsilondev_yy,
+ float* epsilondev_xy,
+ float* epsilondev_xz,
+ float* epsilondev_yz,
+ int* NSPEC_STRAIN_ONLY
+ ) {
+TRACE("prepare_adjoint_constants_device_");
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_adjoint_constants_device 1");
+#endif
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+ int epsilondev_size = 128*(*NSPEC_STRAIN_ONLY);
+
+ // already done earlier
+ // print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_ispec_selected_rec,
+ // *nrec*sizeof(int)),1);
+ // cudaMemcpy(mp->d_ispec_selected_rec,ispec_selected_rec, *nrec*sizeof(int),
+ // cudaMemcpyHostToDevice);
+
+ //print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_rec,
+ // *islice_selected_rec_size*sizeof(int)),901);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray,
+ 3*125*(*NSTEP)*sizeof(float)),902);
+
+
+ cudaMemcpy(mp->d_noise_sourcearray, noise_sourcearray,
+ 3*125*(*NSTEP)*sizeof(float),
+ cudaMemcpyHostToDevice);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_adjoint_constants_device 2");
+#endif
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xx,
+ epsilondev_size*sizeof(float)),903);
+ cudaMemcpy(mp->d_epsilondev_xx,epsilondev_xx,epsilondev_size*sizeof(float),
+ cudaMemcpyHostToDevice);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yy,
+ epsilondev_size*sizeof(float)),904);
+ cudaMemcpy(mp->d_epsilondev_yy,epsilondev_yy,epsilondev_size*sizeof(float),
+ cudaMemcpyHostToDevice);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xy,
+ epsilondev_size*sizeof(float)),905);
+ cudaMemcpy(mp->d_epsilondev_xy,epsilondev_xy,epsilondev_size*sizeof(float),
+ cudaMemcpyHostToDevice);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xz,
+ epsilondev_size*sizeof(float)),906);
+ cudaMemcpy(mp->d_epsilondev_xz,epsilondev_xz,epsilondev_size*sizeof(float),
+ cudaMemcpyHostToDevice);
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yz,
+ epsilondev_size*sizeof(float)),907);
+ cudaMemcpy(mp->d_epsilondev_yz,epsilondev_yz,epsilondev_size*sizeof(float),
+ cudaMemcpyHostToDevice);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_adjoint_constants_device 3");
+#endif
+
+ // these don't seem necessary and crash code for NOISE_TOMOGRAPHY >
+ // 0 b/c rho_kl, etc not yet allocated when NT=1
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_adjoint_sim2_or_3_constants_device,
+ PREPARE_ADJOINT_SIM2_OR_3_CONSTANTS_DEVICE)(
+ long* Mesh_pointer_f,
+ int* islice_selected_rec,
+ int* islice_selected_rec_size) {
+
+TRACE("prepare_adjoint_sim2_or_3_constants_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // allocates arrays for receivers
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_rec,
+ *islice_selected_rec_size*sizeof(int)),802);
+
+ // copies arrays to GPU device
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_rec,islice_selected_rec,
+ *islice_selected_rec_size*sizeof(int),cudaMemcpyHostToDevice),804);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_adjoint_sim2_or_3_constants_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/*
+extern "C" {
+ void prepare_fields_device_(long* Mesh_pointer_f, int* size);
+ void transfer_fields_to_device_(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f);
+ void transfer_fields_from_device_(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f);
+}
+*/
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/*
+void prepare_fields_device_(long* Mesh_pointer_f, int* size) {
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ),sizeof(float)*(*size)),0);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc),sizeof(float)*(*size)),1);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel),sizeof(float)*(*size)),2);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer),sizeof(float)*(*size)),2);
+
+}
+*/
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for ELASTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_fields_elastic_device,
+ PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
+ int* size,
+ float* rmass,
+ float* rho_vp,
+ float* rho_vs,
+ int* num_phase_ispec_elastic,
+ int* phase_ispec_inner_elastic,
+ int* ispec_is_elastic,
+ int* ABSORBING_CONDITIONS,
+ float* h_b_absorb_field,
+ int* b_num_abs_boundary_faces){
+
+TRACE("prepare_fields_elastic_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+ /* Assuming NGLLX==5. Padded is then 128 (5^3+3) */
+ int size_padded = 128 * mp->NSPEC_AB;
+ int size_nonpadded = 125 * mp->NSPEC_AB;
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ),sizeof(float)*(*size)),200);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc),sizeof(float)*(*size)),201);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel),sizeof(float)*(*size)),202);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer),sizeof(float)*(*size)),203);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass),sizeof(float)*mp->NGLOB_AB),204);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vp),size_padded*sizeof(float)),205);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_vs),size_padded*sizeof(float)),206);
+
+ mp->d_num_phase_ispec_elastic = *num_phase_ispec_elastic;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_elastic), mp->d_num_phase_ispec_elastic*2*sizeof(int)),207);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_elastic),mp->NSPEC_AB*sizeof(int)),208);
+
+ // transfer element data
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,rmass,
+ sizeof(float)*mp->NGLOB_AB,cudaMemcpyHostToDevice),209);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic,phase_ispec_inner_elastic,
+ mp->d_num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),210);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_elastic,ispec_is_elastic,
+ mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),211);
+
+ // daniel: not sure if rho_vp, rho_vs needs padding... they are needed for stacey boundary condition
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp, rho_vp,
+ size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),212);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs, rho_vs,
+ size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),213);
+
+ // for seismograms
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_field),3*125*(mp->nrec_local)*sizeof(float)),214);
+ mp->h_station_seismo_field = (float*)malloc(3*125*(mp->nrec_local)*sizeof(float));
+
+ // absorbing conditions
+ if( *ABSORBING_CONDITIONS == 1 ){
+ mp->b_num_abs_boundary_faces = *b_num_abs_boundary_faces;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_field),
+ 3*25*mp->b_num_abs_boundary_faces*sizeof(float)),791);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field,
+ 3*25*mp->b_num_abs_boundary_faces*sizeof(float),cudaMemcpyHostToDevice),792);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_fields_elastic_device");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for ACOUSTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(prepare_fields_acoustic_device,
+ PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f,
+ float* rmass_acoustic,
+ float* rhostore,
+ float* kappastore,
+ int* num_phase_ispec_acoustic,
+ int* phase_ispec_inner_acoustic,
+ int* ispec_is_acoustic,
+ int* num_free_surface_faces,
+ int* free_surface_ispec,
+ int* free_surface_ijk,
+ int* ABSORBING_CONDITIONS,
+ int* b_reclen_potential,
+ float* b_absorb_potential,
+ int* SIMULATION_TYPE,
+ float* rho_ac_kl,
+ float* kappa_ac_kl) {
+
+TRACE("prepare_fields_acoustic_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+ /* Assuming NGLLX==5. Padded is then 128 (5^3+3) */
+ int size_padded = 128 * mp->NSPEC_AB;
+ int size_nonpadded = 125 * mp->NSPEC_AB;
+ int size = mp->NGLOB_AB;
+
+ // allocates arrays on device (GPU)
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_acoustic),sizeof(float)*size),100);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_dot_acoustic),sizeof(float)*size),101);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_dot_dot_acoustic),sizeof(float)*size),102);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_potential_dot_dot_buffer),sizeof(float)*size),103);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_acoustic),sizeof(float)*size),104);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rhostore),size_padded*sizeof(float)),105);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappastore),size_padded*sizeof(float)),106);
+
+ mp->num_phase_ispec_acoustic = *num_phase_ispec_acoustic;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_acoustic), mp->num_phase_ispec_acoustic*2*sizeof(int)),107);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_acoustic),mp->NSPEC_AB*sizeof(int)),108);
+
+ mp->num_free_surface_faces = *num_free_surface_faces;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),mp->num_free_surface_faces*sizeof(int)),109);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),3*25*mp->num_free_surface_faces*sizeof(int)),110);
+
+ // absorbing boundaries
+ if( *ABSORBING_CONDITIONS == 1 ){
+ mp->d_b_reclen_potential = *b_reclen_potential;
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_potential),mp->d_b_reclen_potential),111);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,b_absorb_potential,
+ mp->d_b_reclen_potential,cudaMemcpyHostToDevice),112);
+ }
+
+ // kernel simulations
+ if( *SIMULATION_TYPE == 3 ){
+ // allocates backward/reconstructed arrays on device (GPU)
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_acoustic),sizeof(float)*size),113);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_acoustic),sizeof(float)*size),114);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_dot_acoustic),sizeof(float)*size),115);
+
+ // allocates kernels
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_ac_kl),125*mp->NSPEC_AB*sizeof(float)),181);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_ac_kl),125*mp->NSPEC_AB*sizeof(float)),182);
+ // copies over initial values
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_ac_kl,rho_ac_kl,
+ 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),183);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_kappa_ac_kl,kappa_ac_kl,
+ 125*mp->NSPEC_AB*sizeof(float),cudaMemcpyHostToDevice),184);
+
+ }
+
+ // transfer element data
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_acoustic,rmass_acoustic,
+ sizeof(float)*size,cudaMemcpyHostToDevice),116);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic,
+ mp->num_phase_ispec_acoustic*2*sizeof(int),cudaMemcpyHostToDevice),117);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_acoustic,ispec_is_acoustic,
+ mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),118);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
+ mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),119);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
+ 3*25*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),120);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_kappastore,kappastore,
+ size_nonpadded*sizeof(float),cudaMemcpyHostToDevice),121);
+
+ // transfer constant element data with padding
+ for(int i=0;i<mp->NSPEC_AB;i++) {
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_rhostore+i*128, &rhostore[i*125],
+ 125*sizeof(float),cudaMemcpyHostToDevice),122);
+ }
+
+ // for seismograms
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_potential),mp->nrec_local*125*sizeof(float)),123);
+ mp->h_station_seismo_potential = (float*)malloc(mp->nrec_local*125*sizeof(float));
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_fields_acoustic_device");
+#endif
+}
+
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/save_and_compare_cpu_vs_gpu.c (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_and_compare_cpu_vs_gpu.c)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/save_and_compare_cpu_vs_gpu.c (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/save_and_compare_cpu_vs_gpu.c 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,274 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <errno.h>
+#include <mpi.h>
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+void save_to_max_surface_file_(float* maxval) {
+ int rank;
+ char filename[BUFSIZ];
+ FILE* fp;
+ MPI_Comm_rank(MPI_COMM_WORLD,&rank);
+ sprintf(filename,"maxval_surface_proc_%03d.dat",rank);
+ fp = fopen(filename,"a+");
+ fprintf(fp,"%e\n",*maxval);
+ fclose(fp);
+}
+
+void save_fvector_(float* vector, int* size, int* id, int* cpu_or_gpu) {
+ FILE* fp;
+ char filename[BUFSIZ];
+ if(*cpu_or_gpu == 0) {
+ sprintf(filename, "debug_output_cpu_%d.dat",*id);
+ }
+ else {
+ sprintf(filename, "debug_output_gpu_%d.dat",*id);
+ }
+ fp = fopen(filename, "wb");
+ printf("writing vector, vector[0]=%e\n",vector[0]);
+ fwrite(vector, sizeof(float), *size, fp);
+ fclose(fp);
+
+}
+
+void save_ivector_(int* vector, int* size, int* id, int* cpu_or_gpu) {
+ FILE* fp;
+ char filename[BUFSIZ];
+ if(*cpu_or_gpu == 0) {
+ sprintf(filename, "debug_output_cpu_%d.dat",*id);
+ }
+ else {
+ sprintf(filename, "debug_output_gpu_%d.dat",*id);
+ }
+ fp = fopen(filename, "wb");
+ fwrite(vector, sizeof(int), *size, fp);
+ fclose(fp);
+
+}
+
+
+void get_max_from_surface_file_(int* nodes_per_iterationf,int* NSTEP) {
+ int nodes_per_iteration = *nodes_per_iterationf;
+ char filename[BUFSIZ];
+ int procid;
+ MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ sprintf(filename,"/scratch/eiger/rietmann/SPECFEM3D_AIGLE/in_out_files/DATABASES_MPI/proc%06d_surface_movie",procid);
+
+ FILE* fp; int it;
+ printf("Opening %s for analysis\n",filename);
+ fp = fopen(filename,"rb");
+ char* errorstr;
+ if(fp == 0) {
+ errorstr = strerror(errno);
+ printf("FILE ERROR:%s\n",errorstr);
+ perror("file error\n");
+ exit(1);
+ }
+
+ float* vector = (float*)malloc(nodes_per_iteration*sizeof(float));
+ float max_val;
+ int i;
+ for(it=0;it<*NSTEP;it++) {
+ int pos = (sizeof(float)*nodes_per_iteration)*(it);
+ fseek(fp,pos,SEEK_SET);
+ fread(vector,sizeof(float),nodes_per_iteration,fp);
+ for(i=0;i<nodes_per_iteration;i++) {
+ max_val = MAX(max_val,vector[i]);
+ }
+ if(it % 500 == 0) {
+ printf("scanning it=%d\n",it);
+ }
+ }
+ printf("max_val=%e\n",max_val);
+}
+
+void compare_two_vectors_exact_(int* sizef,float* vector1,float* vector2,int* num_errors) {
+
+ int size = *sizef;
+ int i;
+ int error_count = 0;
+
+ for(i=0;i<size;++i) {
+ if(vector1[i] != vector2[i]) {
+ error_count++;
+ if(error_count < 10) {
+ printf("err[%d]: %e != %e\n",i,vector1[i],vector2[i]);
+ }
+ }
+ }
+ printf("**** Error Count: %d ****\n",error_count);
+ *num_errors = error_count;
+}
+
+void compare_two_vectors_(int* sizef,float* vector1,float* vector2,int* num_errors) {
+
+ int size = *sizef;
+ int i;
+ int error_count = 0;
+ for(i=0;i<size;++i) {
+ if(vector1[i] != 0) {
+ if( fabsf(vector1[i]-vector2[i])/vector1[i] > 0.01) {
+ if(fabsf(vector1[i]-vector2[i]) > 1e-20) {
+ error_count++;
+ if(error_count<10) {
+ printf("err[%d]: %e != %e\n",i,vector1[i],vector2[i]);
+ }
+ }
+ }
+ }
+ /* if(vector1[i] != vector2[i]) { */
+ /* if(fabsf(vector1[i]-vector2[i]) > 1e-25) { */
+ /* error_count++; */
+ /* if(error_count<50) { */
+ /* printf("err[%d]: %e != %e\n",i,vector1[i],vector2[i]); */
+ /* } */
+ /* } */
+ /* } */
+ }
+ printf("**** Error Count: %d ****\n",error_count);
+ *num_errors = error_count;
+}
+
+void compare_surface_files_(int* bytes_per_iteration, int* number_of_iterations) {
+
+ char* cpu_file = "/scratch/eiger/rietmann/SPECFEM3D/in_out_files/DATABASES_MPI/cpu_proc000001_surface_movie";
+ char* gpu_file = "/scratch/eiger/rietmann/SPECFEM3D/in_out_files/DATABASES_MPI/cpu_v2_proc000001_surface_movie";
+
+ FILE* fp_cpu;
+ fp_cpu = fopen(cpu_file,"rb");
+ char* errorstr;
+ if(fp_cpu == 0) {
+ errorstr = strerror(errno);
+ printf("CPU FILE ERROR:%s\n",errorstr);
+ perror("cpu file error\n");
+ }
+ FILE* fp_gpu;
+ fp_gpu = fopen(gpu_file,"rb");
+
+ if(fp_gpu == NULL) {
+ errorstr = strerror(errno);
+ printf("GPU FILE ERROR:%s\n",errorstr);
+ perror("gpu file error\n");
+ }
+
+ /* pause_for_debug(); */
+
+ float* gpu_vector = (float*)malloc(*bytes_per_iteration);
+ float* cpu_vector = (float*)malloc(*bytes_per_iteration);
+ int i,it,error_count=0;
+ for(it=0;it<*number_of_iterations;it++) {
+ int pos = (*bytes_per_iteration)*(it);
+
+ fseek(fp_cpu,pos,SEEK_SET);
+ fseek(fp_gpu,pos,SEEK_SET);
+
+ int number_of_nodes = *bytes_per_iteration/sizeof(float);
+ fread(cpu_vector,sizeof(float),number_of_nodes,fp_cpu);
+ fread(gpu_vector,sizeof(float),number_of_nodes,fp_gpu);
+ int size = number_of_nodes;
+ float gpu_min_val=10;
+ float gpu_max_val=0;
+ float cpu_min_val=10;
+ float cpu_max_val=0;
+ if(it<100) {
+ for(i=0;i<size;i++) {
+ if((fabs(cpu_vector[i] - gpu_vector[i])/(fabs(cpu_vector[i])+1e-31) > 0.01)) {
+ if(error_count < 30) printf("ERROR[%d]: %g != %g\n",i,cpu_vector[i], gpu_vector[i]);
+ if(cpu_vector[i] > 1e-30) error_count++;
+ }
+ if(gpu_vector[i]>gpu_max_val) gpu_max_val = gpu_vector[i];
+ if(gpu_vector[i]<gpu_min_val) gpu_min_val = gpu_vector[i];
+ if(cpu_vector[i]>cpu_max_val) cpu_max_val = cpu_vector[i];
+ if(cpu_vector[i]<cpu_min_val) cpu_min_val = cpu_vector[i];
+ }
+ printf("%d Total Errors\n",error_count);
+ printf("size:%d\n",size);
+ printf("GPU:[min/max]=%e/%e\n",gpu_min_val,gpu_max_val);
+ printf("CPU:[min/max]=%e/%e\n",cpu_min_val,cpu_max_val);
+ }
+ }
+ printf("End of Surface Compare\n");
+ exit(1);
+}
+
+
+void compare_fvector_(float* vector, int* size, int* id, int* cpu_or_gpu) {
+ FILE* fp;
+ char cmp_filename[BUFSIZ];
+ float* compare_vector = (float*)malloc(*size*sizeof(float));
+ if(*cpu_or_gpu == 0) { //swap gpu/cpu for compare
+ sprintf(cmp_filename, "debug_output_gpu_%d.dat",*id);
+ }
+ else {
+ sprintf(cmp_filename, "debug_output_cpu_%d.dat",*id);
+ }
+ fopen(cmp_filename, "rb");
+ /* read the values */
+ if((fp=fopen(cmp_filename, "rb"))==NULL) {
+ printf("Cannot open comparison file %s.\n",cmp_filename);
+ exit(1);
+ }
+ if(fread(compare_vector, sizeof(float), *size, fp) != *size) {
+ if(feof(fp))
+ printf("Premature end of file.");
+ else
+ printf("File read error.");
+ }
+
+ fclose(fp);
+
+ int i;
+ int error_count=0;
+ for(i=0;i<*size;i++) {
+ if((fabs(vector[i] - compare_vector[i])/vector[i] > 0.0001)) {
+ if(error_count < 30) {
+ printf("ERROR[%d]: %g != %g\n",i,compare_vector[i], vector[i]);
+ }
+ error_count++;
+ /* if(compare_vector[i] > 1e-30) error_count++; */
+ }
+ }
+ printf("%d Total Errors\n",error_count);
+ printf("size:%d\n",*size);
+ /* for(i=0;i<30;i++) { */
+ /* printf("val[%d]: %g != %g\n",i,compare_vector[i], vector[i]); */
+ /* /\* printf("error_check[%d]= %g\n",abs(vector[i] - compare_vector[i])/vector[i]); *\/ */
+ /* } */
+}
+
+void compare_ivector_(int* vector, int* size, int* id, int* cpu_or_gpu) {
+ FILE* fp;
+ char cmp_filename[BUFSIZ];
+ int* compare_vector = (int*)malloc(*size*sizeof(int));
+ if(*cpu_or_gpu == 0) { //swap gpu/cpu for compare
+ sprintf(cmp_filename, "debug_output_gpu_%d.dat",*id);
+ }
+ else {
+ sprintf(cmp_filename, "debug_output_cpu_%d.dat",*id);
+ }
+ fopen(cmp_filename, "rb");
+ /* read the values */
+ if((fp=fopen(cmp_filename, "rb"))==NULL) {
+ printf("Cannot open comparison file %s.\n",cmp_filename);
+ exit(1);
+ }
+ if(fread(compare_vector, sizeof(int), *size, fp) != *size) {
+ if(feof(fp))
+ printf("Premature end of file.");
+ else
+ printf("File read error.");
+ }
+
+ fclose(fp);
+
+ int i;
+ int error_count=0;
+ for(i=0;i<*size;i++) {
+ if((abs(vector[i] - compare_vector[i])/vector[i] > 0.01) && error_count < 30) {
+ printf("ERROR[%d]: %g != %g\n",i,compare_vector[i], vector[i]);
+ error_count++;
+ }
+ }
+ printf("%d Total Errors\n",error_count);
+}
Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,365 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+#include "prepare_constants_cuda.h"
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// Transfer functions
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for ELASTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_fields_to_device,
+ TRANSFER_B_FIELDS_TO_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,
+ long* Mesh_pointer_f) {
+
+TRACE("transfer_b_fields_to_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ cudaMemcpy(mp->d_b_displ,b_displ,sizeof(float)*(*size),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_veloc,b_veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice);
+ cudaMemcpy(mp->d_b_accel,b_accel,sizeof(float)*(*size),cudaMemcpyHostToDevice);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_fields_to_device,
+ TRANSFER_FIELDS_TO_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
+
+TRACE("transfer_fields_to_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_displ,displ,sizeof(float)*(*size),cudaMemcpyHostToDevice),3);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_veloc,veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice),4);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_accel,accel,sizeof(float)*(*size),cudaMemcpyHostToDevice),5);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_fields_from_device,
+ TRANSFER_B_FIELDS_FROM_DEVICE)(int* size, float* b_displ, float* b_veloc, float* b_accel,long* Mesh_pointer_f) {
+
+TRACE("transfer_b_fields_from_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ cudaMemcpy(b_displ,mp->d_b_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_veloc,mp->d_b_veloc,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_accel,mp->d_b_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_fields_from_device,
+ TRANSFER_FIELDS_FROM_DEVICE)(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
+
+TRACE("transfer_fields_from_device_");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
+ print_CUDA_error_if_any(cudaMemcpy(veloc,mp->d_veloc,sizeof(float)*(*size),cudaMemcpyDeviceToHost),7);
+ print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),8);
+
+ // printf("Transfered Fields From Device\n");
+ // int procid;
+ // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
+ // printf("Quick check of answer for p:%d in transfer_fields_from_device\n",procid);
+ // for(int i=0;i<5;i++) {
+ // printf("accel[%d]=%2.20e\n",i,accel[i]);
+ // }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_accel_to_device,
+ TRNASFER_ACCEL_TO_DEVICE)(int* size, float* accel,long* Mesh_pointer_f) {
+
+TRACE("transfer_accel_to_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_accel,accel,sizeof(float)*(*size),cudaMemcpyHostToDevice),6);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_accel_from_device,
+ TRANSFER_ACCEL_FROM_DEVICE)(int* size, float* accel,long* Mesh_pointer_f) {
+
+TRACE("transfer_accel_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_accel_from_device,
+ TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, float* b_accel,long* Mesh_pointer_f) {
+
+TRACE("transfer_b_accel_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_sigma_from_device,
+ TRANSFER_SIGMA_FROM_DEVICE)(int* size, float* sigma_kl,long* Mesh_pointer_f) {
+
+TRACE("transfer_sigma_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(sigma_kl,mp->d_Sigma_kl,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_displ_from_device,
+ TRANSFER_B_DISPL_FROM_DEVICE)(int* size, float* displ,long* Mesh_pointer_f) {
+
+TRACE("transfer_b_displ_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_displ_from_device,
+ TRANSFER_DISPL_FROM_DEVICE)(int* size, float* displ,long* Mesh_pointer_f) {
+
+TRACE("transfer_displ_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_compute_kernel_answers_from_device,
+ TRANSFER_COMPUTE_KERNEL_ANSWERS_FROM_DEVICE)(long* Mesh_pointer,
+ float* rho_kl,int* size_rho,
+ float* mu_kl, int* size_mu,
+ float* kappa_kl, int* size_kappa) {
+TRACE("transfer_compute_kernel_answers_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+ cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_compute_kernel_fields_from_device,
+ TRANSFER_COMPUTE_KERNEL_FIELDS_FROM_DEVICE)(long* Mesh_pointer,
+ float* accel, int* size_accel,
+ float* b_displ, int* size_b_displ,
+ float* epsilondev_xx,
+ float* epsilondev_yy,
+ float* epsilondev_xy,
+ float* epsilondev_xz,
+ float* epsilondev_yz,
+ int* size_epsilondev,
+ float* b_epsilondev_xx,
+ float* b_epsilondev_yy,
+ float* b_epsilondev_xy,
+ float* b_epsilondev_xz,
+ float* b_epsilondev_yz,
+ int* size_b_epsilondev,
+ float* rho_kl,int* size_rho,
+ float* mu_kl, int* size_mu,
+ float* kappa_kl, int* size_kappa,
+ float* epsilon_trace_over_3,
+ float* b_epsilon_trace_over_3,
+ int* size_epsilon_trace_over_3) {
+TRACE("transfer_compute_kernel_fields_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+ cudaMemcpy(accel,mp->d_accel,*size_accel*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_displ,mp->d_b_displ,*size_b_displ*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xx,mp->d_epsilondev_xx,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_yy,mp->d_epsilondev_yy,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xy,mp->d_epsilondev_xy,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_xz,mp->d_epsilondev_xz,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilondev_yz,mp->d_epsilondev_yz,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilondev_xx,mp->d_b_epsilondev_xx,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilondev_yy,mp->d_b_epsilondev_yy,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilondev_xy,mp->d_b_epsilondev_xy,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilondev_xz,mp->d_b_epsilondev_xz,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilondev_yz,mp->d_b_epsilondev_yz,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(epsilon_trace_over_3,mp->d_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(float),
+ cudaMemcpyDeviceToHost);
+ cudaMemcpy(b_epsilon_trace_over_3,mp->d_b_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(float),
+ cudaMemcpyDeviceToHost);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_compute_kernel_fields_from_device");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// for ACOUSTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(transfer_fields_acoustic_to_device,
+ TRANSFER_FIELDS_ACOUSTIC_TO_DEVICE)(
+ int* size,
+ float* potential_acoustic,
+ float* potential_dot_acoustic,
+ float* potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {
+TRACE("transfer_fields_acoustic_to_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_acoustic,potential_acoustic,
+ sizeof(float)*(*size),cudaMemcpyHostToDevice),110);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_dot_acoustic,potential_dot_acoustic,
+ sizeof(float)*(*size),cudaMemcpyHostToDevice),120);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
+ sizeof(float)*(*size),cudaMemcpyHostToDevice),130);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_fields_acoustic_to_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_fields_acoustic_to_device,
+ TRANSFER_B_FIELDS_ACOUSTIC_TO_DEVICE)(
+ int* size,
+ float* b_potential_acoustic,
+ float* b_potential_dot_acoustic,
+ float* b_potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {
+TRACE("transfer_b_fields_acoustic_to_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_acoustic,b_potential_acoustic,
+ sizeof(float)*(*size),cudaMemcpyHostToDevice),110);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic,
+ sizeof(float)*(*size),cudaMemcpyHostToDevice),120);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic,
+ sizeof(float)*(*size),cudaMemcpyHostToDevice),130);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_b_fields_acoustic_to_device");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_fields_acoustic_from_device,TRANSFER_FIELDS_ACOUSTIC_FROM_DEVICE)(
+ int* size,
+ float* potential_acoustic,
+ float* potential_dot_acoustic,
+ float* potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {
+TRACE("transfer_fields_acoustic_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(potential_acoustic,mp->d_potential_acoustic,
+ sizeof(float)*(*size),cudaMemcpyDeviceToHost),111);
+ print_CUDA_error_if_any(cudaMemcpy(potential_dot_acoustic,mp->d_potential_dot_acoustic,
+ sizeof(float)*(*size),cudaMemcpyDeviceToHost),121);
+ print_CUDA_error_if_any(cudaMemcpy(potential_dot_dot_acoustic,mp->d_potential_dot_dot_acoustic,
+ sizeof(float)*(*size),cudaMemcpyDeviceToHost),131);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_fields_acoustic_from_device");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_fields_acoustic_from_device,
+ TRANSFER_B_FIELDS_ACOUSTIC_FROM_DEVICE)(
+ int* size,
+ float* b_potential_acoustic,
+ float* b_potential_dot_acoustic,
+ float* b_potential_dot_dot_acoustic,
+ long* Mesh_pointer_f) {
+TRACE("transfer_b_fields_acoustic_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(b_potential_acoustic,mp->d_b_potential_acoustic,
+ sizeof(float)*(*size),cudaMemcpyDeviceToHost),111);
+ print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_acoustic,mp->d_b_potential_dot_acoustic,
+ sizeof(float)*(*size),cudaMemcpyDeviceToHost),121);
+ print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_dot_acoustic,mp->d_b_potential_dot_dot_acoustic,
+ sizeof(float)*(*size),cudaMemcpyDeviceToHost),131);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("after transfer_b_fields_acoustic_from_device");
+#endif
+}
+
+
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms_cuda.cu)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/write_seismograms_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,318 @@
+#include <stdio.h>
+#include <cuda.h>
+#include <cublas.h>
+#include <mpi.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "mesh_constants_cuda.h"
+
+//#define INDEX2(xsize,x,y) x + (y)*xsize
+//#define INDEX3(xsize,ysize,x,y,z) x + (y)*xsize + (z)*xsize*ysize
+//#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize
+//#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize + (j)*xsize*ysize*zsize*isize
+//
+//#define ENABLE_VERY_SLOW_ERROR_CHECKING
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ELASTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void transfer_stations_fields_from_device_kernel(int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ibool,
+ float* station_seismo_field,
+ float* desired_field,
+ int nrec_local,int* debug_index) {
+ int blockID = blockIdx.x + blockIdx.y*gridDim.x;
+ if(blockID<nrec_local) {
+ //int nodeID = threadIdx.x + blockID*blockDim.x;
+ int irec = number_receiver_global[blockID]-1;
+ int ispec = ispec_selected_rec[irec]-1; // ispec==0 before -1???
+ // if(threadIdx.x==1 && blockID < 125) {
+ // // debug_index[threadIdx.x] = threadIdx.x + 125*ispec;
+ // debug_index[blockID] = ispec;
+ // debug_index[blockID + 4] = irec;
+ // debug_index[blockID + 8] = ispec_selected_rec[0];
+ // debug_index[blockID + 9] = ispec_selected_rec[1];
+ // debug_index[blockID +10] = ispec_selected_rec[2];
+ // debug_index[blockID +11] = ispec_selected_rec[3];
+ // debug_index[blockID +12] = ispec_selected_rec[4];
+ // }
+ int iglob = ibool[threadIdx.x + 125*ispec]-1;
+ station_seismo_field[3*125*blockID + 3*threadIdx.x+0] = desired_field[3*iglob];
+ station_seismo_field[3*125*blockID + 3*threadIdx.x+1] = desired_field[3*iglob+1];
+ station_seismo_field[3*125*blockID + 3*threadIdx.x+2] = desired_field[3*iglob+2];
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+//extern "C" void pause_for_debuger(int);
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void transfer_field_from_device(Mesh* mp, float* d_field,float* h_field,
+ int* number_receiver_global,
+ int* d_ispec_selected,
+ int* h_ispec_selected,
+ int* ibool) {
+
+ int blocksize = 125;
+ int num_blocks_x = mp->nrec_local;
+ int num_blocks_y = 1;
+ int myrank;
+ MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ int* d_debug_index;
+ //int* h_debug_index;
+ //cudaMalloc((void**)&d_debug_index,125*sizeof(int));
+ //h_debug_index = (int*)calloc(125,sizeof(int));
+ //cudaMemcpy(d_debug_index,h_debug_index,125*sizeof(int),cudaMemcpyHostToDevice);
+
+
+ // prepare field transfer array on device
+ transfer_stations_fields_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
+ d_ispec_selected,
+ mp->d_ibool,
+ mp->d_station_seismo_field,
+ d_field,
+ mp->nrec_local,d_debug_index);
+
+ //cudaMemcpy(h_debug_index,d_debug_index,125*sizeof(int),cudaMemcpyDeviceToHost);
+
+ // pause_for_debug(1);
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_stations_fields_from_device_kernel");
+#endif
+
+ cudaMemcpy(mp->h_station_seismo_field,mp->d_station_seismo_field,
+ (3*125)*(mp->nrec_local)*sizeof(float),cudaMemcpyDeviceToHost);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_stations_fields_from_device_kernel_memcpy");
+#endif
+
+ // pause_for_debug(1);
+ int irec_local;
+
+ for(irec_local=0;irec_local<mp->nrec_local;irec_local++) {
+ int irec = number_receiver_global[irec_local]-1;
+ int ispec = h_ispec_selected[irec]-1;
+
+ for(int i=0;i<125;i++) {
+ int iglob = ibool[i+125*ispec]-1;
+ h_field[0+3*iglob] = mp->h_station_seismo_field[0+3*i+irec_local*125*3];
+ h_field[1+3*iglob] = mp->h_station_seismo_field[1+3*i+irec_local*125*3];
+ h_field[2+3*iglob] = mp->h_station_seismo_field[2+3*i+irec_local*125*3];
+ }
+
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_station_fields_from_device,
+ TRANSFER_STATION_FIELDS_FROM_DEVICE)(float* displ,float* veloc,float* accel,
+ float* b_displ, float* b_veloc, float* b_accel,
+ long* Mesh_pointer_f,int* number_receiver_global,
+ int* ispec_selected_rec,int* ispec_selected_source,
+ int* ibool,int* SIMULATION_TYPEf) {
+TRACE("transfer_station_fields_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+
+ int SIMULATION_TYPE = *SIMULATION_TYPEf;
+
+ if(SIMULATION_TYPE == 1) {
+ transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ }
+ else if(SIMULATION_TYPE == 2) {
+ transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ }
+ else if(SIMULATION_TYPE == 3) {
+ transfer_field_from_device(mp,mp->d_b_displ,b_displ, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_from_device(mp,mp->d_b_veloc,b_veloc, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_from_device(mp,mp->d_b_accel,b_accel, number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// ACOUSTIC simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void transfer_stations_fields_acoustic_from_device_kernel(int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ibool,
+ float* station_seismo_potential,
+ float* desired_potential) {
+
+ int blockID = blockIdx.x + blockIdx.y*gridDim.x;
+ int nodeID = threadIdx.x + blockID*blockDim.x;
+
+ int irec = number_receiver_global[blockID]-1;
+ int ispec = ispec_selected_rec[irec]-1;
+ int iglob = ibool[threadIdx.x + 125*ispec]-1;
+
+ //if(threadIdx.x == 0 ) printf("node acoustic: %i %i %i %i %i %e \n",blockID,nodeID,irec,ispec,iglob,desired_potential[iglob]);
+
+ station_seismo_potential[nodeID] = desired_potential[iglob];
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+void transfer_field_acoustic_from_device(Mesh* mp,
+ float* d_potential,
+ float* h_potential,
+ int* number_receiver_global,
+ int* d_ispec_selected,
+ int* h_ispec_selected,
+ int* ibool) {
+
+TRACE("transfer_field_acoustic_from_device");
+
+ int irec_local,irec,ispec,iglob,j;
+
+ // checks if anything to do
+ if( mp->nrec_local == 0 ) return;
+
+ // sets up kernel dimensions
+ int blocksize = 125;
+ int num_blocks_x = mp->nrec_local;
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = ceil(num_blocks_x/2.0);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // prepare field transfer array on device
+ transfer_stations_fields_acoustic_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
+ d_ispec_selected,
+ mp->d_ibool,
+ mp->d_station_seismo_potential,
+ d_potential);
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("transfer_stations_fields_acoustic_from_device_kernel");
+#endif
+
+ print_CUDA_error_if_any(cudaMemcpy(mp->h_station_seismo_potential,mp->d_station_seismo_potential,
+ mp->nrec_local*125*sizeof(float),cudaMemcpyDeviceToHost),500);
+
+ //printf("copy local receivers: %i \n",mp->nrec_local);
+
+ for(irec_local=0; irec_local < mp->nrec_local; irec_local++) {
+ irec = number_receiver_global[irec_local]-1;
+ ispec = h_ispec_selected[irec]-1;
+
+ // copy element values
+ // note: iglob may vary and can be irregularly accessing the h_potential array
+ for(j=0; j < 125; j++){
+ iglob = ibool[j+125*ispec]-1;
+ h_potential[iglob] = mp->h_station_seismo_potential[j+irec_local*125];
+ }
+
+ // copy each station element's points to working array
+ // note: this works if iglob values would be all aligned...
+ //memcpy(&(h_potential[iglob]),&(mp->h_station_seismo_potential[irec_local*125]),125*sizeof(float));
+
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_station_fields_acoustic_from_device,
+ TRANSFER_STATION_FIELDS_ACOUSTIC_FROM_DEVICE)(
+ float* potential_acoustic,
+ float* potential_dot_acoustic,
+ float* potential_dot_dot_acoustic,
+ float* b_potential_acoustic,
+ float* b_potential_dot_acoustic,
+ float* b_potential_dot_dot_acoustic,
+ long* Mesh_pointer_f,
+ int* number_receiver_global,
+ int* ispec_selected_rec,
+ int* ispec_selected_source,
+ int* ibool,
+ int* SIMULATION_TYPEf) {
+
+TRACE("transfer_station_fields_acoustic_from_device");
+ //double start_time = get_time();
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
+
+ int SIMULATION_TYPE = *SIMULATION_TYPEf;
+
+ if(SIMULATION_TYPE == 1) {
+ transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ }
+ else if(SIMULATION_TYPE == 2) {
+ transfer_field_acoustic_from_device(mp,mp->d_potential_acoustic,potential_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_acoustic,potential_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_source, ispec_selected_source, ibool);
+ }
+ else if(SIMULATION_TYPE == 3) {
+ transfer_field_acoustic_from_device(mp,mp->d_b_potential_acoustic,b_potential_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ transfer_field_acoustic_from_device(mp,mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic,
+ number_receiver_global,
+ mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ //double end_time = get_time();
+ //printf("Elapsed time: %e\n",end_time-start_time);
+#endif
+}
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/assemble_MPI_scalar.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/assemble_MPI_scalar.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/assemble_MPI_scalar.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,359 +1,358 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!----
-!---- assemble the contributions between slices using non-blocking MPI
-!----
-
- subroutine assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,array_val, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- my_neighbours_ext_mesh)
-
-! subroutine assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,array_val, &
-! buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
-! num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-! nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-! my_neighbours_ext_mesh, &
-! request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
-
- implicit none
-
- include "constants.h"
-
- integer :: NPROC
- integer :: NGLOB_AB
-
-! array to assemble
- real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: array_val
-
- integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
- integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
-
-! real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
-! buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh
-! integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
-
- real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_send_scalar_ext_mesh
- real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_recv_scalar_ext_mesh
- integer, dimension(:), allocatable :: request_send_scalar_ext_mesh
- integer, dimension(:), allocatable :: request_recv_scalar_ext_mesh
-
-
- integer ipoin,iinterface,ier
-
-! here we have to assemble all the contributions between partitions using MPI
-
-! assemble only if more than one partition
- if(NPROC > 1) then
-
- allocate(buffer_send_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array buffer_send_scalar_ext_mesh'
- allocate(buffer_recv_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array buffer_recv_scalar_ext_mesh'
- allocate(request_send_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array request_send_scalar_ext_mesh'
- allocate(request_recv_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array request_recv_scalar_ext_mesh'
-
- ! partition border copy into the buffer
- do iinterface = 1, num_interfaces_ext_mesh
- do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- buffer_send_scalar_ext_mesh(ipoin,iinterface) = array_val(ibool_interfaces_ext_mesh(ipoin,iinterface))
- enddo
- enddo
-
- ! send messages
- do iinterface = 1, num_interfaces_ext_mesh
- ! non-blocking synchronous send request
- call isend_cr(buffer_send_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
- nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_send_scalar_ext_mesh(iinterface) &
- )
- ! receive request
- call irecv_cr(buffer_recv_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
- nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_recv_scalar_ext_mesh(iinterface) &
- )
- enddo
-
- ! wait for communications completion (recv)
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_recv_scalar_ext_mesh(iinterface))
- enddo
-
- ! adding contributions of neighbours
- do iinterface = 1, num_interfaces_ext_mesh
- do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
- array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_scalar_ext_mesh(ipoin,iinterface)
- enddo
- enddo
-
- ! wait for communications completion (send)
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_send_scalar_ext_mesh(iinterface))
- enddo
-
- deallocate(buffer_send_scalar_ext_mesh)
- deallocate(buffer_recv_scalar_ext_mesh)
- deallocate(request_send_scalar_ext_mesh)
- deallocate(request_recv_scalar_ext_mesh)
-
- endif
-
- end subroutine assemble_MPI_scalar_ext_mesh
-
-!
-!----
-!
-
- subroutine assemble_MPI_scalar_i_ext_mesh(NPROC,NGLOB_AB,array_val, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- my_neighbours_ext_mesh)
-
- implicit none
-
- include "constants.h"
-
- integer :: NPROC
- integer :: NGLOB_AB
-
-! array to assemble
- integer, dimension(NGLOB_AB) :: array_val
-
- integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
- integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
-
- integer, dimension(:,:), allocatable :: buffer_send_scalar_ext_mesh
- integer, dimension(:,:), allocatable :: buffer_recv_scalar_ext_mesh
- integer, dimension(:), allocatable :: request_send_scalar_ext_mesh
- integer, dimension(:), allocatable :: request_recv_scalar_ext_mesh
-
- integer :: ipoin,iinterface,ier
-
-! here we have to assemble all the contributions between partitions using MPI
-
-! assemble only if more than one partition
- if(NPROC > 1) then
-
- allocate(buffer_send_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array buffer_send_scalar_ext_mesh'
- allocate(buffer_recv_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array buffer_recv_scalar_ext_mesh'
- allocate(request_send_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array request_send_scalar_ext_mesh'
- allocate(request_recv_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array request_recv_scalar_ext_mesh'
-
- ! partition border copy into the buffer
- do iinterface = 1, num_interfaces_ext_mesh
- do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- buffer_send_scalar_ext_mesh(ipoin,iinterface) = &
- array_val(ibool_interfaces_ext_mesh(ipoin,iinterface))
- enddo
- enddo
-
- ! send messages
- do iinterface = 1, num_interfaces_ext_mesh
- ! non-blocking synchronous send request
- call isend_i(buffer_send_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
- nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_send_scalar_ext_mesh(iinterface) &
- )
- ! receive request
- call irecv_i(buffer_recv_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
- nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_recv_scalar_ext_mesh(iinterface) &
- )
- enddo
-
- ! wait for communications completion
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_recv_scalar_ext_mesh(iinterface))
- enddo
-
- ! adding contributions of neighbours
- do iinterface = 1, num_interfaces_ext_mesh
- do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
- array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) &
- + buffer_recv_scalar_ext_mesh(ipoin,iinterface)
- enddo
- enddo
-
- ! wait for communications completion (send)
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_send_scalar_ext_mesh(iinterface))
- enddo
-
- deallocate(buffer_send_scalar_ext_mesh)
- deallocate(buffer_recv_scalar_ext_mesh)
- deallocate(request_send_scalar_ext_mesh)
- deallocate(request_recv_scalar_ext_mesh)
-
- endif
-
- end subroutine assemble_MPI_scalar_i_ext_mesh
-
-!
-!----
-!
-
- subroutine assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,array_val, &
- buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- my_neighbours_ext_mesh, &
- request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
-
-! non-blocking MPI send
-
- implicit none
-
- include "constants.h"
-
- integer :: NPROC
- integer :: NGLOB_AB
- integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
-
-! array to send
- real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: array_val
-
-
- real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
- buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh
-
- integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
- integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
-
- integer ipoin,iinterface
-
-! sends only if more than one partition
- if(NPROC > 1) then
-
- ! partition border copy into the buffer
- do iinterface = 1, num_interfaces_ext_mesh
- do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- buffer_send_scalar_ext_mesh(ipoin,iinterface) = &
- array_val(ibool_interfaces_ext_mesh(ipoin,iinterface))
- enddo
- enddo
-
- ! send messages
- do iinterface = 1, num_interfaces_ext_mesh
- ! non-blocking synchronous send request
- call isend_cr(buffer_send_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
- nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_send_scalar_ext_mesh(iinterface) &
- )
- ! receive request
- call irecv_cr(buffer_recv_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
- nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_recv_scalar_ext_mesh(iinterface) &
- )
-
- enddo
-
- endif
-
- end subroutine assemble_MPI_scalar_ext_mesh_s
-
-!
-!----
-!
-
- subroutine assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,array_val, &
- buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
- max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
-
-! waits for send/receiver to be completed and assembles contributions
-
- implicit none
-
- include "constants.h"
-
- integer :: NPROC
- integer :: NGLOB_AB
- integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
-! array to assemble
- real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: array_val
-
-
- real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
- buffer_recv_scalar_ext_mesh
-
- integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
- integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
-
- integer ipoin,iinterface
-
-! assemble only if more than one partition
- if(NPROC > 1) then
-
- ! wait for communications completion (recv)
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_recv_scalar_ext_mesh(iinterface))
- enddo
-
- ! adding contributions of neighbours
- do iinterface = 1, num_interfaces_ext_mesh
- do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
- array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) &
- + buffer_recv_scalar_ext_mesh(ipoin,iinterface)
- enddo
- enddo
-
- ! wait for communications completion (send)
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_send_scalar_ext_mesh(iinterface))
- enddo
-
- endif
-
- end subroutine assemble_MPI_scalar_ext_mesh_w
-
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!----
+!---- assemble the contributions between slices using non-blocking MPI
+!----
+
+ subroutine assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,array_val, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh)
+
+! subroutine assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,array_val, &
+! buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
+! num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+! nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+! my_neighbours_ext_mesh, &
+! request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+
+! array to assemble
+ real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: array_val
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+
+! real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+! buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh
+! integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
+
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_send_scalar_ext_mesh
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_recv_scalar_ext_mesh
+ integer, dimension(:), allocatable :: request_send_scalar_ext_mesh
+ integer, dimension(:), allocatable :: request_recv_scalar_ext_mesh
+
+
+ integer ipoin,iinterface,ier
+
+! here we have to assemble all the contributions between partitions using MPI
+
+! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ allocate(buffer_send_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array buffer_send_scalar_ext_mesh'
+ allocate(buffer_recv_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array buffer_recv_scalar_ext_mesh'
+ allocate(request_send_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array request_send_scalar_ext_mesh'
+ allocate(request_recv_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array request_recv_scalar_ext_mesh'
+
+ ! partition border copy into the buffer
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ buffer_send_scalar_ext_mesh(ipoin,iinterface) = array_val(ibool_interfaces_ext_mesh(ipoin,iinterface))
+ enddo
+ enddo
+
+ ! send messages
+ do iinterface = 1, num_interfaces_ext_mesh
+ ! non-blocking synchronous send request
+ call isend_cr(buffer_send_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_send_scalar_ext_mesh(iinterface) &
+ )
+ ! receive request
+ call irecv_cr(buffer_recv_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_recv_scalar_ext_mesh(iinterface) &
+ )
+ enddo
+
+ ! wait for communications completion (recv)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_recv_scalar_ext_mesh(iinterface))
+ enddo
+
+ ! adding contributions of neighbours
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_scalar_ext_mesh(ipoin,iinterface)
+ enddo
+ enddo
+
+ ! wait for communications completion (send)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_send_scalar_ext_mesh(iinterface))
+ enddo
+
+ deallocate(buffer_send_scalar_ext_mesh)
+ deallocate(buffer_recv_scalar_ext_mesh)
+ deallocate(request_send_scalar_ext_mesh)
+ deallocate(request_recv_scalar_ext_mesh)
+
+ endif
+
+ end subroutine assemble_MPI_scalar_ext_mesh
+
+!
+!----
+!
+
+ subroutine assemble_MPI_scalar_i_ext_mesh(NPROC,NGLOB_AB,array_val, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+
+! array to assemble
+ integer, dimension(NGLOB_AB) :: array_val
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+
+ integer, dimension(:,:), allocatable :: buffer_send_scalar_ext_mesh
+ integer, dimension(:,:), allocatable :: buffer_recv_scalar_ext_mesh
+ integer, dimension(:), allocatable :: request_send_scalar_ext_mesh
+ integer, dimension(:), allocatable :: request_recv_scalar_ext_mesh
+
+ integer :: ipoin,iinterface,ier
+
+! here we have to assemble all the contributions between partitions using MPI
+
+! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ allocate(buffer_send_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array buffer_send_scalar_ext_mesh'
+ allocate(buffer_recv_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array buffer_recv_scalar_ext_mesh'
+ allocate(request_send_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array request_send_scalar_ext_mesh'
+ allocate(request_recv_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array request_recv_scalar_ext_mesh'
+
+ ! partition border copy into the buffer
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ buffer_send_scalar_ext_mesh(ipoin,iinterface) = &
+ array_val(ibool_interfaces_ext_mesh(ipoin,iinterface))
+ enddo
+ enddo
+
+ ! send messages
+ do iinterface = 1, num_interfaces_ext_mesh
+ ! non-blocking synchronous send request
+ call isend_i(buffer_send_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_send_scalar_ext_mesh(iinterface) &
+ )
+ ! receive request
+ call irecv_i(buffer_recv_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_recv_scalar_ext_mesh(iinterface) &
+ )
+ enddo
+
+ ! wait for communications completion
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_recv_scalar_ext_mesh(iinterface))
+ enddo
+
+ ! adding contributions of neighbours
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) &
+ + buffer_recv_scalar_ext_mesh(ipoin,iinterface)
+ enddo
+ enddo
+
+ ! wait for communications completion (send)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_send_scalar_ext_mesh(iinterface))
+ enddo
+
+ deallocate(buffer_send_scalar_ext_mesh)
+ deallocate(buffer_recv_scalar_ext_mesh)
+ deallocate(request_send_scalar_ext_mesh)
+ deallocate(request_recv_scalar_ext_mesh)
+
+ endif
+
+ end subroutine assemble_MPI_scalar_i_ext_mesh
+
+!
+!----
+!
+
+ subroutine assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,array_val, &
+ buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh, &
+ request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+
+! non-blocking MPI send
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+! array to send
+ real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: array_val
+
+
+ real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
+
+ integer ipoin,iinterface
+
+! sends only if more than one partition
+ if(NPROC > 1) then
+
+ ! partition border copy into the buffer
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ buffer_send_scalar_ext_mesh(ipoin,iinterface) = &
+ array_val(ibool_interfaces_ext_mesh(ipoin,iinterface))
+ enddo
+ enddo
+
+ ! send messages
+ do iinterface = 1, num_interfaces_ext_mesh
+ ! non-blocking synchronous send request
+ call isend_cr(buffer_send_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_send_scalar_ext_mesh(iinterface) &
+ )
+ ! receive request
+ call irecv_cr(buffer_recv_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_recv_scalar_ext_mesh(iinterface) &
+ )
+
+ enddo
+
+ endif
+
+ end subroutine assemble_MPI_scalar_ext_mesh_s
+
+!
+!----
+!
+
+ subroutine assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,array_val, &
+ buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+
+! waits for send/receiver to be completed and assembles contributions
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+! array to assemble
+ real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: array_val
+
+
+ real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_recv_scalar_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
+
+ integer ipoin,iinterface
+
+! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ ! wait for communications completion (recv)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_recv_scalar_ext_mesh(iinterface))
+ enddo
+
+ ! adding contributions of neighbours
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) &
+ + buffer_recv_scalar_ext_mesh(ipoin,iinterface)
+ enddo
+ enddo
+
+ ! wait for communications completion (send)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_send_scalar_ext_mesh(iinterface))
+ enddo
+
+ endif
+
+ end subroutine assemble_MPI_scalar_ext_mesh_w
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/combine_vol_data.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,815 +1,949 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- program combine_paraview_data_ext_mesh
-
-! puts the output of SPECFEM3D into '***.mesh' format,
-! which can be converted via mesh2vtu into ParaView format.
-!
-! for Paraview, see http://www.paraview.org for details
-!
-! combines the database files on several slices.
-! the local database file needs to have been collected onto the frontend (copy_local_database.pl)
-!
-! works for external, unregular meshes
-
- implicit none
-
- include 'constants.h'
-
- ! data must be of dimension: (NGLLX,NGLLY,NGLLZ,NSPEC_AB)
- double precision,dimension(:,:,:,:),allocatable :: data
- ! real array for data
- real,dimension(:,:,:,:),allocatable :: dat
-
- ! mesh coordinates
- real(kind=CUSTOM_REAL),dimension(:),allocatable :: xstore, ystore, zstore
- integer, dimension(:,:,:,:),allocatable :: ibool
-
- integer :: NSPEC_AB, NGLOB_AB
- integer :: numpoin
- integer :: i, ios, it, ier
- integer :: iproc, proc1, proc2, num_node, node_list(2000)
- integer :: np, ne, npp, nee, nelement, njunk
-
- character(len=256) :: sline, arg(6), filename, indir, outdir
- character(len=256) :: prname, prname_lp
- character(len=256) :: mesh_file,local_data_file
- logical :: HIGH_RESOLUTION_MESH
- integer :: ires
-
- ! for read_parameter_files
- double precision :: DT
- double precision :: HDUR_MOVIE
- integer :: NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP, &
- UTM_PROJECTION_ZONE,SIMULATION_TYPE
- integer :: NSOURCES,NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY
- integer :: NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO
- logical :: MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
- USE_HIGHRES_FOR_MOVIES,SUPPRESS_UTM_PROJECTION
- logical :: ATTENUATION,USE_OLSEN_ATTENUATION, &
- OCEANS,TOPOGRAPHY
- logical :: ABSORBING_CONDITIONS,SAVE_FORWARD
- logical :: ANISOTROPY,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION
- character(len=256) LOCAL_PATH
-
-! checks given arguments
- print *
- print *,'Recombining ParaView data for slices'
- print *
-
- do i = 1, 6
- call getarg(i,arg(i))
- if (i < 6 .and. trim(arg(i)) == '') then
- print *, 'Usage: '
- print *, ' xcombine_data start_slice end_slice filename input_dir output_dir high/low-resolution'
- print *, ' or '
- print *, ' xcombine_data slice_list filename input_dir output_dir high/low-resolution'
- print *
- print *, ' possible filenames are '
- print *, ' rho_vp, rho_vs, kappastore, mustore, alpha_kernel, etc'
- print *
- print *, ' that are stored in the local directory as real(kind=CUSTOM_REAL) filename(NGLLX,NGLLY,NGLLZ,NSPEC_AB) '
- print *, ' in filename.bin'
- print *
- print *, ' files have been collected in input_dir, output mesh file goes to output_dir '
- print *, ' give 0 for low resolution and 1 for high resolution'
- print *
- stop ' Reenter command line options'
- endif
- enddo
-
-! get slice list
- if (trim(arg(6)) == '') then
- num_node = 0
- open(unit = 20, file = trim(arg(1)), status = 'unknown',iostat = ios)
- if (ios /= 0) then
- print *,'Error opening ',trim(arg(1))
- stop
- endif
- do while ( 1 == 1)
- read(20,'(a)',iostat=ios) sline
- if (ios /= 0) exit
- read(sline,*,iostat=ios) njunk
- if (ios /= 0) exit
- num_node = num_node + 1
- node_list(num_node) = njunk
- enddo
- close(20)
- filename = arg(2)
- indir= arg(3)
- outdir = arg(4)
- read(arg(5),*) ires
- else
- read(arg(1),*) proc1
- read(arg(2),*) proc2
- do iproc = proc1, proc2
- node_list(iproc - proc1 + 1) = iproc
- enddo
- num_node = proc2 - proc1 + 1
- filename = arg(3)
- indir = arg(4)
- outdir = arg(5)
- read(arg(6),*) ires
- endif
-
- if (ires == 0) then
- HIGH_RESOLUTION_MESH = .false.
- else
- HIGH_RESOLUTION_MESH = .true.
- endif
-
- ! needs local_path for mesh files
- call read_parameter_file( NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
- ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
- OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
- MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
- NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
- SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION, &
- NTSTEP_BETWEEN_OUTPUT_INFO,SIMULATION_TYPE,SAVE_FORWARD, &
- NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
-
- print *, 'Slice list: '
- print *, node_list(1:num_node)
-
- ! open paraview output mesh file
- mesh_file = trim(outdir) // '/' // trim(filename)//'.mesh'
- call open_file(trim(mesh_file)//char(0))
-
- ! counts total number of points (all slices)
- npp = 0
- nee = 0
- call cvd_count_totals_ext_mesh(num_node,node_list,LOCAL_PATH,&
- npp,nee,HIGH_RESOLUTION_MESH)
-
-
- ! writes point and scalar information
- ! loops over slices (process partitions)
- np = 0
- do it = 1, num_node
-
- iproc = node_list(it)
-
- print *, ' '
- print *, 'Reading slice ', iproc
-
- ! gets number of elements and global points for this partition
- write(prname_lp,'(a,i6.6,a)') trim(LOCAL_PATH)//'/proc',iproc,'_'
- open(unit=27,file=prname_lp(1:len_trim(prname_lp))//'external_mesh.bin',&
- status='old',action='read',form='unformatted',iostat=ios)
- read(27) NSPEC_AB
- read(27) NGLOB_AB
-
- ! ibool file
- allocate(ibool(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibool'
- read(27) ibool
-
- ! global point arrays
- allocate(xstore(NGLOB_AB),ystore(NGLOB_AB),zstore(NGLOB_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating array xstore etc.'
- read(27) xstore
- read(27) ystore
- read(27) zstore
- close(27)
-
-
- ! data file
- write(prname,'(a,i6.6,a)') trim(indir)//'proc',iproc,'_'
- local_data_file = trim(prname) // trim(filename) // '.bin'
- open(unit = 28,file = trim(local_data_file),status='old',&
- action='read',form ='unformatted',iostat=ios)
- if (ios /= 0) then
- print *,'Error opening ',trim(local_data_file)
- stop
- endif
-
- allocate(dat(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating dat array'
-
- ! uses conversion to real values
- if( CUSTOM_REAL == SIZE_DOUBLE ) then
- allocate(data(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating data array'
- read(28) data
- dat = sngl(data)
- deallocate(data)
- else
- read(28) dat
- endif
- close(28)
- print *, trim(local_data_file)
-
- ! writes point coordinates and scalar value to mesh file
- if (.not. HIGH_RESOLUTION_MESH) then
- ! writes out element corners only
- call cvd_write_corners(NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore,dat, &
- it,npp,numpoin)
- else
- ! high resolution, all GLL points
- call cvd_write_GLL_points(NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore,dat,&
- it,npp,numpoin)
- endif
-
- print*,' points:',np,numpoin
-
- ! stores total number of points written
- np = np + numpoin
-
- ! cleans up memory allocations
- deallocate(ibool,dat,xstore,ystore,zstore)
-
- enddo ! all slices for points
-
- if (np /= npp) stop 'Error: Number of total points are not consistent'
- print *, 'Total number of points: ', np
- print *, ' '
-
-
-! writes element information
- ne = 0
- np = 0
- do it = 1, num_node
-
- iproc = node_list(it)
-
- print *, 'Reading slice ', iproc
- write(prname_lp,'(a,i6.6,a)') trim(LOCAL_PATH)//'/proc',iproc,'_'
-
- ! gets number of elements and global points for this partition
- open(unit=27,file=prname_lp(1:len_trim(prname_lp))//'external_mesh.bin',&
- status='old',action='read',form='unformatted')
- read(27) NSPEC_AB
- read(27) NGLOB_AB
-
- ! ibool file
- allocate(ibool(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibool'
- read(27) ibool
- close(27)
-
- ! writes out element corner indices
- if (.not. HIGH_RESOLUTION_MESH) then
- ! spectral elements
- call cvd_write_corner_elements(NSPEC_AB,NGLOB_AB,ibool, &
- np,nelement,it,nee,numpoin)
- else
- ! subdivided spectral elements
- call cvd_write_GLL_elements(NSPEC_AB,NGLOB_AB,ibool, &
- np,nelement,it,nee,numpoin)
- endif
-
- print*,' elements:',ne,nelement
- print*,' points : ',np,numpoin
-
- ne = ne + nelement
-
- deallocate(ibool)
-
- enddo ! num_node
-
- ! checks with total number of elements
- if (ne /= nee) then
- print*,'error: number of elements counted:',ne,'total:',nee
- stop 'Number of total elements are not consistent'
- endif
- print *, 'Total number of elements: ', ne
-
- ! close mesh file
- call close_file()
-
- print *, 'Done writing '//trim(mesh_file)
-
- end program combine_paraview_data_ext_mesh
-
-
-!=============================================================
-
-
- subroutine cvd_count_totals_ext_mesh(num_node,node_list,LOCAL_PATH,&
- npp,nee,HIGH_RESOLUTION_MESH)
-
-! counts total number of points and elements for external meshes in given slice list
-! returns: total number of elements (nee) and number of points (npp)
-
- implicit none
- include 'constants.h'
-
- integer,intent(in) :: num_node,node_list(300)
- character(len=256),intent(in) :: LOCAL_PATH
- integer,intent(out) :: npp,nee
- logical,intent(in) :: HIGH_RESOLUTION_MESH
-
- ! local parameters
- integer, dimension(:,:,:,:),allocatable :: ibool
- logical, dimension(:),allocatable :: mask_ibool
- integer :: NSPEC_AB, NGLOB_AB
- integer :: it,iproc,npoint,nelement,ios,ispec,ier
- integer :: iglob1, iglob2, iglob3, iglob4, iglob5, iglob6, iglob7, iglob8
- character(len=256) :: prname_lp
-
-
- ! loops over all slices (process partitions)
- npp = 0
- nee = 0
- do it = 1, num_node
-
- ! gets number of elements and points for this slice
- iproc = node_list(it)
- write(prname_lp,'(a,i6.6,a)') trim(LOCAL_PATH)//'/proc',iproc,'_'
- open(unit=27,file=prname_lp(1:len_trim(prname_lp))//'external_mesh.bin',&
- status='old',action='read',form='unformatted',iostat=ios)
- if (ios /= 0) then
- print *,'Error opening: ',prname_lp(1:len_trim(prname_lp))//'external_mesh.bin'
- stop
- endif
-
- read(27) NSPEC_AB
- read(27) NGLOB_AB
- ! gets ibool
- if( .not. HIGH_RESOLUTION_MESH ) then
- allocate(ibool(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating array ibool'
- read(27) ibool
- endif
- close(27)
-
- ! calculates totals
- if( HIGH_RESOLUTION_MESH ) then
- ! total number of global points
- npp = npp + NGLOB_AB
-
- ! total number of elements
- ! each spectral elements gets subdivided by GLL points,
- ! which form (NGLLX-1)*(NGLLY-1)*(NGLLZ-1) sub-elements
- nelement = NSPEC_AB * (NGLLX-1) * (NGLLY-1) * (NGLLZ-1)
- nee = nee + nelement
-
- else
-
- ! mark element corners (global AVS or DX points)
- allocate(mask_ibool(NGLOB_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating array mask_ibool'
- mask_ibool = .false.
- do ispec=1,NSPEC_AB
- iglob1=ibool(1,1,1,ispec)
- iglob2=ibool(NGLLX,1,1,ispec)
- iglob3=ibool(NGLLX,NGLLY,1,ispec)
- iglob4=ibool(1,NGLLY,1,ispec)
- iglob5=ibool(1,1,NGLLZ,ispec)
- iglob6=ibool(NGLLX,1,NGLLZ,ispec)
- iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
- iglob8=ibool(1,NGLLY,NGLLZ,ispec)
- mask_ibool(iglob1) = .true.
- mask_ibool(iglob2) = .true.
- mask_ibool(iglob3) = .true.
- mask_ibool(iglob4) = .true.
- mask_ibool(iglob5) = .true.
- mask_ibool(iglob6) = .true.
- mask_ibool(iglob7) = .true.
- mask_ibool(iglob8) = .true.
- enddo
-
- ! count global number of AVS or DX points
- npoint = count(mask_ibool(:))
- npp = npp + npoint
-
- ! total number of spectral elements
- nee = nee + NSPEC_AB
-
- endif ! HIGH_RESOLUTION_MESH
-
- ! frees arrays
- if( allocated(mask_ibool) ) deallocate( mask_ibool)
- if( allocated(ibool) ) deallocate(ibool)
-
- enddo
-
- end subroutine cvd_count_totals_ext_mesh
-
-!=============================================================
-
-
- subroutine cvd_write_corners(NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore,dat,&
- it,npp,numpoin)
-
-! writes out locations of spectral element corners only
-
- implicit none
- include 'constants.h'
-
- integer,intent(in) :: NSPEC_AB,NGLOB_AB
- integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
- real(kind=CUSTOM_REAL),dimension(NGLOB_AB) :: xstore, ystore, zstore
- real,dimension(NGLLY,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: dat
- integer:: it
- integer :: npp,numpoin
-
- ! local parameters
- logical,dimension(:),allocatable :: mask_ibool
- real :: x, y, z
- integer :: ispec,ier
- integer :: iglob1, iglob2, iglob3, iglob4, iglob5, iglob6, iglob7, iglob8
-
- ! writes out total number of points
- if (it == 1) then
- call write_integer(npp)
- endif
-
- ! writes our corner point locations
- allocate(mask_ibool(NGLOB_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating array mask_ibool'
- mask_ibool(:) = .false.
- numpoin = 0
- do ispec=1,NSPEC_AB
- iglob1=ibool(1,1,1,ispec)
- iglob2=ibool(NGLLX,1,1,ispec)
- iglob3=ibool(NGLLX,NGLLY,1,ispec)
- iglob4=ibool(1,NGLLY,1,ispec)
- iglob5=ibool(1,1,NGLLZ,ispec)
- iglob6=ibool(NGLLX,1,NGLLZ,ispec)
- iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
- iglob8=ibool(1,NGLLY,NGLLZ,ispec)
-
- if(.not. mask_ibool(iglob1)) then
- numpoin = numpoin + 1
- x = xstore(iglob1)
- y = ystore(iglob1)
- z = zstore(iglob1)
- call write_real(x)
- call write_real(y)
- call write_real(z)
- call write_real(dat(1,1,1,ispec))
- mask_ibool(iglob1) = .true.
- endif
- if(.not. mask_ibool(iglob2)) then
- numpoin = numpoin + 1
- x = xstore(iglob2)
- y = ystore(iglob2)
- z = zstore(iglob2)
- call write_real(x)
- call write_real(y)
- call write_real(z)
- call write_real(dat(NGLLX,1,1,ispec))
- mask_ibool(iglob2) = .true.
- endif
- if(.not. mask_ibool(iglob3)) then
- numpoin = numpoin + 1
- x = xstore(iglob3)
- y = ystore(iglob3)
- z = zstore(iglob3)
- call write_real(x)
- call write_real(y)
- call write_real(z)
- call write_real(dat(NGLLX,NGLLY,1,ispec))
- mask_ibool(iglob3) = .true.
- endif
- if(.not. mask_ibool(iglob4)) then
- numpoin = numpoin + 1
- x = xstore(iglob4)
- y = ystore(iglob4)
- z = zstore(iglob4)
- call write_real(x)
- call write_real(y)
- call write_real(z)
- call write_real(dat(1,NGLLY,1,ispec))
- mask_ibool(iglob4) = .true.
- endif
- if(.not. mask_ibool(iglob5)) then
- numpoin = numpoin + 1
- x = xstore(iglob5)
- y = ystore(iglob5)
- z = zstore(iglob5)
- call write_real(x)
- call write_real(y)
- call write_real(z)
- call write_real(dat(1,1,NGLLZ,ispec))
- mask_ibool(iglob5) = .true.
- endif
- if(.not. mask_ibool(iglob6)) then
- numpoin = numpoin + 1
- x = xstore(iglob6)
- y = ystore(iglob6)
- z = zstore(iglob6)
- call write_real(x)
- call write_real(y)
- call write_real(z)
- call write_real(dat(NGLLX,1,NGLLZ,ispec))
- mask_ibool(iglob6) = .true.
- endif
- if(.not. mask_ibool(iglob7)) then
- numpoin = numpoin + 1
- x = xstore(iglob7)
- y = ystore(iglob7)
- z = zstore(iglob7)
- call write_real(x)
- call write_real(y)
- call write_real(z)
- call write_real(dat(NGLLX,NGLLY,NGLLZ,ispec))
- mask_ibool(iglob7) = .true.
- endif
- if(.not. mask_ibool(iglob8)) then
- numpoin = numpoin + 1
- x = xstore(iglob8)
- y = ystore(iglob8)
- z = zstore(iglob8)
- call write_real(x)
- call write_real(y)
- call write_real(z)
- call write_real(dat(1,NGLLY,NGLLZ,ispec))
- mask_ibool(iglob8) = .true.
- endif
- enddo ! ispec
-
- end subroutine cvd_write_corners
-
-
-!=============================================================
-
-
- subroutine cvd_write_GLL_points(NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore,dat,&
- it,npp,numpoin)
-
-! writes out locations of all GLL points of spectral elements
-
- implicit none
- include 'constants.h'
-
- integer,intent(in) :: NSPEC_AB,NGLOB_AB
- integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
- real(kind=CUSTOM_REAL),dimension(NGLOB_AB) :: xstore, ystore, zstore
- real,dimension(NGLLY,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: dat
- integer:: it,npp,numpoin
-
- ! local parameters
- logical,dimension(:),allocatable :: mask_ibool
- real :: x, y, z
- integer :: ispec,i,j,k,iglob,ier
-
- ! writes out total number of points
- if (it == 1) then
- call write_integer(npp)
- endif
-
- ! writes out point locations and values
- allocate(mask_ibool(NGLOB_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating array mask_ibool'
-
- mask_ibool(:) = .false.
- numpoin = 0
- do ispec=1,NSPEC_AB
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool(i,j,k,ispec)
- if(.not. mask_ibool(iglob)) then
- numpoin = numpoin + 1
- x = xstore(iglob)
- y = ystore(iglob)
- z = zstore(iglob)
- call write_real(x)
- call write_real(y)
- call write_real(z)
- call write_real(dat(i,j,k,ispec))
- mask_ibool(iglob) = .true.
- endif
- enddo ! i
- enddo ! j
- enddo ! k
- enddo !ispec
-
- end subroutine cvd_write_GLL_points
-
-!=============================================================
-
-! writes out locations of spectral element corners only
-
- subroutine cvd_write_corner_elements(NSPEC_AB,NGLOB_AB,ibool,&
- np,nelement,it,nee,numpoin)
-
- implicit none
- include 'constants.h'
-
- integer,intent(in) :: NSPEC_AB,NGLOB_AB
- integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
- integer:: it,nee,np,nelement,numpoin
-
- ! local parameters
- logical,dimension(:),allocatable :: mask_ibool
- integer,dimension(:),allocatable :: num_ibool
- integer :: ispec,ier
- integer :: iglob1, iglob2, iglob3, iglob4, iglob5, iglob6, iglob7, iglob8
- integer :: n1, n2, n3, n4, n5, n6, n7, n8
-
- ! outputs total number of elements for all slices
- if (it == 1) then
- call write_integer(nee)
- end if
-
- ! writes out element indices
- allocate(mask_ibool(NGLOB_AB), &
- num_ibool(NGLOB_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating array mask_ibool'
-
- mask_ibool(:) = .false.
- num_ibool(:) = 0
- numpoin = 0
- do ispec=1,NSPEC_AB
- ! gets corner indices
- iglob1=ibool(1,1,1,ispec)
- iglob2=ibool(NGLLX,1,1,ispec)
- iglob3=ibool(NGLLX,NGLLY,1,ispec)
- iglob4=ibool(1,NGLLY,1,ispec)
- iglob5=ibool(1,1,NGLLZ,ispec)
- iglob6=ibool(NGLLX,1,NGLLZ,ispec)
- iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
- iglob8=ibool(1,NGLLY,NGLLZ,ispec)
-
- ! sets increasing numbering
- if(.not. mask_ibool(iglob1)) then
- numpoin = numpoin + 1
- num_ibool(iglob1) = numpoin
- mask_ibool(iglob1) = .true.
- endif
- if(.not. mask_ibool(iglob2)) then
- numpoin = numpoin + 1
- num_ibool(iglob2) = numpoin
- mask_ibool(iglob2) = .true.
- endif
- if(.not. mask_ibool(iglob3)) then
- numpoin = numpoin + 1
- num_ibool(iglob3) = numpoin
- mask_ibool(iglob3) = .true.
- endif
- if(.not. mask_ibool(iglob4)) then
- numpoin = numpoin + 1
- num_ibool(iglob4) = numpoin
- mask_ibool(iglob4) = .true.
- endif
- if(.not. mask_ibool(iglob5)) then
- numpoin = numpoin + 1
- num_ibool(iglob5) = numpoin
- mask_ibool(iglob5) = .true.
- endif
- if(.not. mask_ibool(iglob6)) then
- numpoin = numpoin + 1
- num_ibool(iglob6) = numpoin
- mask_ibool(iglob6) = .true.
- endif
- if(.not. mask_ibool(iglob7)) then
- numpoin = numpoin + 1
- num_ibool(iglob7) = numpoin
- mask_ibool(iglob7) = .true.
- endif
- if(.not. mask_ibool(iglob8)) then
- numpoin = numpoin + 1
- num_ibool(iglob8) = numpoin
- mask_ibool(iglob8) = .true.
- endif
-
- ! outputs corner indices (starting with 0 )
- n1 = num_ibool(iglob1) -1 + np
- n2 = num_ibool(iglob2) -1 + np
- n3 = num_ibool(iglob3) -1 + np
- n4 = num_ibool(iglob4) -1 + np
- n5 = num_ibool(iglob5) -1 + np
- n6 = num_ibool(iglob6) -1 + np
- n7 = num_ibool(iglob7) -1 + np
- n8 = num_ibool(iglob8) -1 + np
-
- call write_integer(n1)
- call write_integer(n2)
- call write_integer(n3)
- call write_integer(n4)
- call write_integer(n5)
- call write_integer(n6)
- call write_integer(n7)
- call write_integer(n8)
-
- enddo
-
- ! elements written
- nelement = NSPEC_AB
-
- ! updates points written
- np = np + numpoin
-
- end subroutine cvd_write_corner_elements
-
-
-!=============================================================
-
-
- subroutine cvd_write_GLL_elements(NSPEC_AB,NGLOB_AB,ibool, &
- np,nelement,it,nee,numpoin)
-
-! writes out indices of elements given by GLL points
-
- implicit none
- include 'constants.h'
-
- integer,intent(in):: NSPEC_AB,NGLOB_AB
- integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
- integer:: it,nee,np,numpoin,nelement
-
- ! local parameters
- logical,dimension(:),allocatable :: mask_ibool
- integer,dimension(:),allocatable :: num_ibool
- integer :: ispec,i,j,k,ier
- integer :: iglob,iglob1, iglob2, iglob3, iglob4, iglob5, iglob6, iglob7, iglob8
- integer :: n1, n2, n3, n4, n5, n6, n7, n8
-
- ! outputs total number of elements for all slices
- if (it == 1) then
- !nee = nelement * num_node
- call write_integer(nee)
- endif
-
- ! sets numbering num_ibool respecting mask
- allocate(mask_ibool(NGLOB_AB), &
- num_ibool(NGLOB_AB),stat=ier)
- if( ier /= 0 ) stop 'error allocating array mask_ibool'
-
- mask_ibool(:) = .false.
- num_ibool(:) = 0
- numpoin = 0
- do ispec=1,NSPEC_AB
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool(i,j,k,ispec)
- if(.not. mask_ibool(iglob)) then
- numpoin = numpoin + 1
- num_ibool(iglob) = numpoin
- mask_ibool(iglob) = .true.
- endif
- enddo ! i
- enddo ! j
- enddo ! k
- enddo !ispec
-
- ! outputs GLL subelement
- do ispec = 1, NSPEC_AB
- do k = 1, NGLLZ-1
- do j = 1, NGLLY-1
- do i = 1, NGLLX-1
- iglob1 = ibool(i,j,k,ispec)
- iglob2 = ibool(i+1,j,k,ispec)
- iglob3 = ibool(i+1,j+1,k,ispec)
- iglob4 = ibool(i,j+1,k,ispec)
- iglob5 = ibool(i,j,k+1,ispec)
- iglob6 = ibool(i+1,j,k+1,ispec)
- iglob7 = ibool(i+1,j+1,k+1,ispec)
- iglob8 = ibool(i,j+1,k+1,ispec)
- n1 = num_ibool(iglob1)+np-1
- n2 = num_ibool(iglob2)+np-1
- n3 = num_ibool(iglob3)+np-1
- n4 = num_ibool(iglob4)+np-1
- n5 = num_ibool(iglob5)+np-1
- n6 = num_ibool(iglob6)+np-1
- n7 = num_ibool(iglob7)+np-1
- n8 = num_ibool(iglob8)+np-1
- call write_integer(n1)
- call write_integer(n2)
- call write_integer(n3)
- call write_integer(n4)
- call write_integer(n5)
- call write_integer(n6)
- call write_integer(n7)
- call write_integer(n8)
- enddo
- enddo
- enddo
- enddo
- ! elements written
- nelement = NSPEC_AB * (NGLLX-1) * (NGLLY-1) * (NGLLZ-1)
-
- ! updates points written
- np = np + numpoin
-
- end subroutine cvd_write_GLL_elements
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ module vtk
+
+
+ !-------------------------------------------------------------
+ ! USER PARAMETER
+
+ ! outputs as VTK ASCII file
+ logical,parameter :: USE_VTK_OUTPUT = .true.
+
+ !-------------------------------------------------------------
+
+
+ ! global point data
+ real,dimension(:),allocatable :: total_dat
+
+ end module vtk
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ program combine_paraview_data_ext_mesh
+
+! puts the output of SPECFEM3D into '***.mesh' format,
+! which can be converted via mesh2vtu into ParaView format.
+!
+! for Paraview, see http://www.paraview.org for details
+!
+! combines the database files on several slices.
+! the local database file needs to have been collected onto the frontend (copy_local_database.pl)
+!
+! works for external, unregular meshes
+
+ use vtk
+ implicit none
+
+ include 'constants.h'
+
+ ! data must be of dimension: (NGLLX,NGLLY,NGLLZ,NSPEC_AB)
+ double precision,dimension(:,:,:,:),allocatable :: data
+ ! real array for data
+ real,dimension(:,:,:,:),allocatable :: dat
+
+ ! mesh coordinates
+ real(kind=CUSTOM_REAL),dimension(:),allocatable :: xstore, ystore, zstore
+ integer, dimension(:,:,:,:),allocatable :: ibool
+
+ integer :: NSPEC_AB, NGLOB_AB
+ integer :: numpoin
+ integer :: i, ios, it, ier
+ integer :: iproc, proc1, proc2, num_node, node_list(600)
+ integer :: np, ne, npp, nee, nelement, njunk
+
+ character(len=256) :: sline, arg(6), filename, indir, outdir
+ character(len=256) :: prname, prname_lp
+ character(len=256) :: mesh_file,local_data_file
+ logical :: HIGH_RESOLUTION_MESH
+ integer :: ires
+
+ ! for read_parameter_files
+ double precision :: DT
+ double precision :: HDUR_MOVIE
+ integer :: NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP, &
+ UTM_PROJECTION_ZONE,SIMULATION_TYPE
+ integer :: NSOURCES,NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY
+ integer :: NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO
+ logical :: MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
+ USE_HIGHRES_FOR_MOVIES,SUPPRESS_UTM_PROJECTION
+ logical :: ATTENUATION,USE_OLSEN_ATTENUATION, &
+ OCEANS,TOPOGRAPHY
+ logical :: ABSORBING_CONDITIONS,SAVE_FORWARD
+ logical :: ANISOTROPY,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION
+ character(len=256) LOCAL_PATH
+
+! checks given arguments
+ print *
+ print *,'Recombining ParaView data for slices'
+ print *
+
+ do i = 1, 6
+ call getarg(i,arg(i))
+ if (i < 6 .and. trim(arg(i)) == '') then
+ print *, 'Usage: '
+ print *, ' xcombine_data start_slice end_slice filename input_dir output_dir high/low-resolution'
+ print *, ' or '
+ print *, ' xcombine_data slice_list filename input_dir output_dir high/low-resolution'
+ print *
+ print *, ' possible filenames are '
+ print *, ' rho_vp, rho_vs, kappastore, mustore, alpha_kernel, etc'
+ print *
+ print *, ' that are stored in the local directory as real(kind=CUSTOM_REAL) filename(NGLLX,NGLLY,NGLLZ,NSPEC_AB) '
+ print *, ' in filename.bin'
+ print *
+ print *, ' files have been collected in input_dir, output mesh file goes to output_dir '
+ print *, ' give 0 for low resolution and 1 for high resolution'
+ print *
+ stop ' Reenter command line options'
+ endif
+ enddo
+
+! get slice list
+ if (trim(arg(6)) == '') then
+ num_node = 0
+ open(unit = 20, file = trim(arg(1)), status = 'unknown',iostat = ios)
+ if (ios /= 0) then
+ print *,'Error opening ',trim(arg(1))
+ stop
+ endif
+ do while ( 1 == 1)
+ read(20,'(a)',iostat=ios) sline
+ if (ios /= 0) exit
+ read(sline,*,iostat=ios) njunk
+ if (ios /= 0) exit
+ num_node = num_node + 1
+ node_list(num_node) = njunk
+ enddo
+ close(20)
+ filename = arg(2)
+ indir= arg(3)
+ outdir = arg(4)
+ read(arg(5),*) ires
+ else
+ read(arg(1),*) proc1
+ read(arg(2),*) proc2
+ do iproc = proc1, proc2
+ node_list(iproc - proc1 + 1) = iproc
+ enddo
+ num_node = proc2 - proc1 + 1
+ filename = arg(3)
+ indir = arg(4)
+ outdir = arg(5)
+ read(arg(6),*) ires
+ endif
+
+ if (ires == 0) then
+ HIGH_RESOLUTION_MESH = .false.
+ else
+ HIGH_RESOLUTION_MESH = .true.
+ endif
+
+ ! needs local_path for mesh files
+ call read_parameter_file( NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
+ UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
+ ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
+ OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
+ MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
+ NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
+ SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION, &
+ NTSTEP_BETWEEN_OUTPUT_INFO,SIMULATION_TYPE,SAVE_FORWARD, &
+ NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
+
+ print *, 'Slice list: '
+ print *, node_list(1:num_node)
+
+ if( USE_VTK_OUTPUT ) then
+ mesh_file = trim(outdir) // '/' // trim(filename)//'.vtk'
+ open(IOVTK,file=mesh_file(1:len_trim(mesh_file)),status='unknown',iostat=ios)
+ if( ios /= 0 ) stop 'error opening vtk output file'
+
+ write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
+ write(IOVTK,'(a)') 'material model VTK file'
+ write(IOVTK,'(a)') 'ASCII'
+ write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
+ else
+ ! open paraview output mesh file
+ mesh_file = trim(outdir) // '/' // trim(filename)//'.mesh'
+ call open_file(trim(mesh_file)//char(0))
+ endif
+
+ ! counts total number of points (all slices)
+ npp = 0
+ nee = 0
+ call cvd_count_totals_ext_mesh(num_node,node_list,LOCAL_PATH,&
+ npp,nee,HIGH_RESOLUTION_MESH)
+
+
+ ! writes point and scalar information
+ ! loops over slices (process partitions)
+ np = 0
+ do it = 1, num_node
+
+ iproc = node_list(it)
+
+ print *, ' '
+ print *, 'Reading slice ', iproc
+
+ ! gets number of elements and global points for this partition
+ write(prname_lp,'(a,i6.6,a)') trim(LOCAL_PATH)//'/proc',iproc,'_'
+ open(unit=27,file=prname_lp(1:len_trim(prname_lp))//'external_mesh.bin',&
+ status='old',action='read',form='unformatted',iostat=ios)
+ read(27) NSPEC_AB
+ read(27) NGLOB_AB
+
+ ! ibool file
+ allocate(ibool(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibool'
+ read(27) ibool
+
+ ! global point arrays
+ allocate(xstore(NGLOB_AB),ystore(NGLOB_AB),zstore(NGLOB_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array xstore etc.'
+ read(27) xstore
+ read(27) ystore
+ read(27) zstore
+ close(27)
+
+
+ ! data file
+ write(prname,'(a,i6.6,a)') trim(indir)//'proc',iproc,'_'
+ local_data_file = trim(prname) // trim(filename) // '.bin'
+ open(unit = 28,file = trim(local_data_file),status='old',&
+ action='read',form ='unformatted',iostat=ios)
+ if (ios /= 0) then
+ print *,'Error opening ',trim(local_data_file)
+ stop
+ endif
+
+ allocate(dat(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating dat array'
+
+ ! uses conversion to real values
+ if( CUSTOM_REAL == SIZE_DOUBLE ) then
+ allocate(data(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating data array'
+ read(28) data
+ dat = sngl(data)
+ deallocate(data)
+ else
+ read(28) dat
+ endif
+ close(28)
+ print *, trim(local_data_file)
+
+ ! writes point coordinates and scalar value to mesh file
+ if (.not. HIGH_RESOLUTION_MESH) then
+ ! writes out element corners only
+ call cvd_write_corners(NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore,dat, &
+ it,npp,numpoin,np)
+ else
+ ! high resolution, all GLL points
+ call cvd_write_GLL_points(NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore,dat,&
+ it,npp,numpoin,np)
+ endif
+
+ print*,' points:',np,numpoin
+
+ ! stores total number of points written
+ np = np + numpoin
+
+ ! cleans up memory allocations
+ deallocate(ibool,dat,xstore,ystore,zstore)
+
+ enddo ! all slices for points
+
+ if( USE_VTK_OUTPUT) write(IOVTK,*) ""
+
+ if (np /= npp) stop 'Error: Number of total points are not consistent'
+ print *, 'Total number of points: ', np
+ print *, ' '
+
+
+! writes element information
+ ne = 0
+ np = 0
+ do it = 1, num_node
+
+ iproc = node_list(it)
+
+ print *, 'Reading slice ', iproc
+ write(prname_lp,'(a,i6.6,a)') trim(LOCAL_PATH)//'/proc',iproc,'_'
+
+ ! gets number of elements and global points for this partition
+ open(unit=27,file=prname_lp(1:len_trim(prname_lp))//'external_mesh.bin',&
+ status='old',action='read',form='unformatted')
+ read(27) NSPEC_AB
+ read(27) NGLOB_AB
+
+ ! ibool file
+ allocate(ibool(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibool'
+ read(27) ibool
+ close(27)
+
+ ! writes out element corner indices
+ if (.not. HIGH_RESOLUTION_MESH) then
+ ! spectral elements
+ call cvd_write_corner_elements(NSPEC_AB,NGLOB_AB,ibool, &
+ np,nelement,it,nee,numpoin)
+ else
+ ! subdivided spectral elements
+ call cvd_write_GLL_elements(NSPEC_AB,NGLOB_AB,ibool, &
+ np,nelement,it,nee,numpoin)
+ endif
+
+ print*,' elements:',ne,nelement
+ print*,' points : ',np,numpoin
+
+ ne = ne + nelement
+
+ deallocate(ibool)
+
+ enddo ! num_node
+
+ if( USE_VTK_OUTPUT) write(IOVTK,*) ""
+
+ ! checks with total number of elements
+ if (ne /= nee) then
+ print*,'error: number of elements counted:',ne,'total:',nee
+ stop 'Number of total elements are not consistent'
+ endif
+ print *, 'Total number of elements: ', ne
+
+ if( USE_VTK_OUTPUT) then
+ ! type: hexahedrons
+ write(IOVTK,'(a,i12)') "CELL_TYPES ",nee
+ write(IOVTK,*) (12,it=1,nee)
+ write(IOVTK,*) ""
+
+ write(IOVTK,'(a,i12)') "POINT_DATA ",npp
+ write(IOVTK,'(a)') "SCALARS "//trim(filename)//" float"
+ write(IOVTK,'(a)') "LOOKUP_TABLE default"
+ do it = 1,npp
+ write(IOVTK,*) total_dat(it)
+ enddo
+ write(IOVTK,*) ""
+ close(IOVTK)
+ else
+ ! close mesh file
+ call close_file()
+ endif
+
+ print *, 'Done writing '//trim(mesh_file)
+
+ end program combine_paraview_data_ext_mesh
+
+
+!=============================================================
+
+
+ subroutine cvd_count_totals_ext_mesh(num_node,node_list,LOCAL_PATH,&
+ npp,nee,HIGH_RESOLUTION_MESH)
+
+! counts total number of points and elements for external meshes in given slice list
+! returns: total number of elements (nee) and number of points (npp)
+
+ implicit none
+ include 'constants.h'
+
+ integer,intent(in) :: num_node,node_list(300)
+ character(len=256),intent(in) :: LOCAL_PATH
+ integer,intent(out) :: npp,nee
+ logical,intent(in) :: HIGH_RESOLUTION_MESH
+
+ ! local parameters
+ integer, dimension(:,:,:,:),allocatable :: ibool
+ logical, dimension(:),allocatable :: mask_ibool
+ integer :: NSPEC_AB, NGLOB_AB
+ integer :: it,iproc,npoint,nelement,ios,ispec,ier
+ integer :: iglob1, iglob2, iglob3, iglob4, iglob5, iglob6, iglob7, iglob8
+ character(len=256) :: prname_lp
+
+
+ ! loops over all slices (process partitions)
+ npp = 0
+ nee = 0
+ do it = 1, num_node
+
+ ! gets number of elements and points for this slice
+ iproc = node_list(it)
+ write(prname_lp,'(a,i6.6,a)') trim(LOCAL_PATH)//'/proc',iproc,'_'
+ open(unit=27,file=prname_lp(1:len_trim(prname_lp))//'external_mesh.bin',&
+ status='old',action='read',form='unformatted',iostat=ios)
+ if (ios /= 0) then
+ print *,'Error opening: ',prname_lp(1:len_trim(prname_lp))//'external_mesh.bin'
+ stop
+ endif
+
+ read(27) NSPEC_AB
+ read(27) NGLOB_AB
+ ! gets ibool
+ if( .not. HIGH_RESOLUTION_MESH ) then
+ allocate(ibool(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array ibool'
+ read(27) ibool
+ endif
+ close(27)
+
+ ! calculates totals
+ if( HIGH_RESOLUTION_MESH ) then
+ ! total number of global points
+ npp = npp + NGLOB_AB
+
+ ! total number of elements
+ ! each spectral elements gets subdivided by GLL points,
+ ! which form (NGLLX-1)*(NGLLY-1)*(NGLLZ-1) sub-elements
+ nelement = NSPEC_AB * (NGLLX-1) * (NGLLY-1) * (NGLLZ-1)
+ nee = nee + nelement
+
+ else
+
+ ! mark element corners (global AVS or DX points)
+ allocate(mask_ibool(NGLOB_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array mask_ibool'
+ mask_ibool = .false.
+ do ispec=1,NSPEC_AB
+ iglob1=ibool(1,1,1,ispec)
+ iglob2=ibool(NGLLX,1,1,ispec)
+ iglob3=ibool(NGLLX,NGLLY,1,ispec)
+ iglob4=ibool(1,NGLLY,1,ispec)
+ iglob5=ibool(1,1,NGLLZ,ispec)
+ iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+ iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+ iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+ mask_ibool(iglob1) = .true.
+ mask_ibool(iglob2) = .true.
+ mask_ibool(iglob3) = .true.
+ mask_ibool(iglob4) = .true.
+ mask_ibool(iglob5) = .true.
+ mask_ibool(iglob6) = .true.
+ mask_ibool(iglob7) = .true.
+ mask_ibool(iglob8) = .true.
+ enddo
+
+ ! count global number of AVS or DX points
+ npoint = count(mask_ibool(:))
+ npp = npp + npoint
+
+ ! total number of spectral elements
+ nee = nee + NSPEC_AB
+
+ endif ! HIGH_RESOLUTION_MESH
+
+ ! frees arrays
+ if( allocated(mask_ibool) ) deallocate( mask_ibool)
+ if( allocated(ibool) ) deallocate(ibool)
+
+ enddo
+
+ end subroutine cvd_count_totals_ext_mesh
+
+!=============================================================
+
+
+ subroutine cvd_write_corners(NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore,dat,&
+ it,npp,numpoin,np)
+
+! writes out locations of spectral element corners only
+ use vtk
+ implicit none
+ include 'constants.h'
+
+ integer,intent(in) :: NSPEC_AB,NGLOB_AB
+ integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
+ real(kind=CUSTOM_REAL),dimension(NGLOB_AB) :: xstore, ystore, zstore
+ real,dimension(NGLLY,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: dat
+ integer:: it
+ integer :: npp,numpoin,np
+
+ ! local parameters
+ logical,dimension(:),allocatable :: mask_ibool
+ real :: x, y, z
+ integer :: ispec,ier
+ integer :: iglob1, iglob2, iglob3, iglob4, iglob5, iglob6, iglob7, iglob8
+
+ ! writes out total number of points
+ if (it == 1) then
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK, '(a,i12,a)') 'POINTS ', npp, ' float'
+ ! creates array to hold point data
+ allocate(total_dat(npp),stat=ier)
+ if( ier /= 0 ) stop 'error allocating total dat array'
+ total_dat(:) = 0.0
+ else
+ call write_integer(npp)
+ endif
+ endif
+
+ ! writes our corner point locations
+ allocate(mask_ibool(NGLOB_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array mask_ibool'
+ mask_ibool(:) = .false.
+ numpoin = 0
+ do ispec=1,NSPEC_AB
+ iglob1=ibool(1,1,1,ispec)
+ iglob2=ibool(NGLLX,1,1,ispec)
+ iglob3=ibool(NGLLX,NGLLY,1,ispec)
+ iglob4=ibool(1,NGLLY,1,ispec)
+ iglob5=ibool(1,1,NGLLZ,ispec)
+ iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+ iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+ iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+
+ if(.not. mask_ibool(iglob1)) then
+ numpoin = numpoin + 1
+ x = xstore(iglob1)
+ y = ystore(iglob1)
+ z = zstore(iglob1)
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK,'(3e18.6)') x,y,z
+ total_dat(np+numpoin) = dat(1,1,1,ispec)
+ else
+ call write_real(x)
+ call write_real(y)
+ call write_real(z)
+ call write_real(dat(1,1,1,ispec))
+ endif
+ mask_ibool(iglob1) = .true.
+ endif
+ if(.not. mask_ibool(iglob2)) then
+ numpoin = numpoin + 1
+ x = xstore(iglob2)
+ y = ystore(iglob2)
+ z = zstore(iglob2)
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK,'(3e18.6)') x,y,z
+ total_dat(np+numpoin) = dat(NGLLX,1,1,ispec)
+ else
+ call write_real(x)
+ call write_real(y)
+ call write_real(z)
+ call write_real(dat(NGLLX,1,1,ispec))
+ endif
+ mask_ibool(iglob2) = .true.
+ endif
+ if(.not. mask_ibool(iglob3)) then
+ numpoin = numpoin + 1
+ x = xstore(iglob3)
+ y = ystore(iglob3)
+ z = zstore(iglob3)
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK,'(3e18.6)') x,y,z
+ total_dat(np+numpoin) = dat(NGLLX,NGLLY,1,ispec)
+ else
+ call write_real(x)
+ call write_real(y)
+ call write_real(z)
+ call write_real(dat(NGLLX,NGLLY,1,ispec))
+ endif
+ mask_ibool(iglob3) = .true.
+ endif
+ if(.not. mask_ibool(iglob4)) then
+ numpoin = numpoin + 1
+ x = xstore(iglob4)
+ y = ystore(iglob4)
+ z = zstore(iglob4)
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK,'(3e18.6)') x,y,z
+ total_dat(np+numpoin) = dat(1,NGLLY,1,ispec)
+ else
+ call write_real(x)
+ call write_real(y)
+ call write_real(z)
+ call write_real(dat(1,NGLLY,1,ispec))
+ endif
+ mask_ibool(iglob4) = .true.
+ endif
+ if(.not. mask_ibool(iglob5)) then
+ numpoin = numpoin + 1
+ x = xstore(iglob5)
+ y = ystore(iglob5)
+ z = zstore(iglob5)
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK,'(3e18.6)') x,y,z
+ total_dat(np+numpoin) = dat(1,1,NGLLZ,ispec)
+ else
+ call write_real(x)
+ call write_real(y)
+ call write_real(z)
+ call write_real(dat(1,1,NGLLZ,ispec))
+ endif
+ mask_ibool(iglob5) = .true.
+ endif
+ if(.not. mask_ibool(iglob6)) then
+ numpoin = numpoin + 1
+ x = xstore(iglob6)
+ y = ystore(iglob6)
+ z = zstore(iglob6)
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK,'(3e18.6)') x,y,z
+ total_dat(np+numpoin) = dat(NGLLX,1,NGLLZ,ispec)
+ else
+ call write_real(x)
+ call write_real(y)
+ call write_real(z)
+ call write_real(dat(NGLLX,1,NGLLZ,ispec))
+ endif
+ mask_ibool(iglob6) = .true.
+ endif
+ if(.not. mask_ibool(iglob7)) then
+ numpoin = numpoin + 1
+ x = xstore(iglob7)
+ y = ystore(iglob7)
+ z = zstore(iglob7)
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK,'(3e18.6)') x,y,z
+ total_dat(np+numpoin) = dat(NGLLX,NGLLY,NGLLZ,ispec)
+ else
+ call write_real(x)
+ call write_real(y)
+ call write_real(z)
+ call write_real(dat(NGLLX,NGLLY,NGLLZ,ispec))
+ endif
+ mask_ibool(iglob7) = .true.
+ endif
+ if(.not. mask_ibool(iglob8)) then
+ numpoin = numpoin + 1
+ x = xstore(iglob8)
+ y = ystore(iglob8)
+ z = zstore(iglob8)
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK,'(3e18.6)') x,y,z
+ total_dat(np+numpoin) = dat(1,NGLLY,NGLLZ,ispec)
+ else
+ call write_real(x)
+ call write_real(y)
+ call write_real(z)
+ call write_real(dat(1,NGLLY,NGLLZ,ispec))
+ endif
+ mask_ibool(iglob8) = .true.
+ endif
+ enddo ! ispec
+
+ end subroutine cvd_write_corners
+
+
+!=============================================================
+
+
+ subroutine cvd_write_GLL_points(NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore,dat,&
+ it,npp,numpoin,np)
+
+! writes out locations of all GLL points of spectral elements
+ use vtk
+ implicit none
+ include 'constants.h'
+
+ integer,intent(in) :: NSPEC_AB,NGLOB_AB
+ integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
+ real(kind=CUSTOM_REAL),dimension(NGLOB_AB) :: xstore, ystore, zstore
+ real,dimension(NGLLY,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: dat
+ integer:: it,npp,numpoin,np
+
+ ! local parameters
+ logical,dimension(:),allocatable :: mask_ibool
+ real :: x, y, z
+ integer :: ispec,i,j,k,iglob,ier
+
+ ! writes out total number of points
+ if (it == 1) then
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK, '(a,i12,a)') 'POINTS ', npp, ' float'
+ ! creates array to hold point data
+ allocate(total_dat(npp),stat=ier)
+ if( ier /= 0 ) stop 'error allocating total dat array'
+ total_dat(:) = 0.0
+ else
+ call write_integer(npp)
+ endif
+ endif
+
+ ! writes out point locations and values
+ allocate(mask_ibool(NGLOB_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array mask_ibool'
+
+ mask_ibool(:) = .false.
+ numpoin = 0
+ do ispec=1,NSPEC_AB
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool(i,j,k,ispec)
+ if(.not. mask_ibool(iglob)) then
+ numpoin = numpoin + 1
+ x = xstore(iglob)
+ y = ystore(iglob)
+ z = zstore(iglob)
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK,'(3e18.6)') x,y,z
+ total_dat(np+numpoin) = dat(i,j,k,ispec)
+ else
+ call write_real(x)
+ call write_real(y)
+ call write_real(z)
+ call write_real(dat(i,j,k,ispec))
+ endif
+ mask_ibool(iglob) = .true.
+ endif
+ enddo ! i
+ enddo ! j
+ enddo ! k
+ enddo !ispec
+
+ end subroutine cvd_write_GLL_points
+
+!=============================================================
+
+! writes out locations of spectral element corners only
+
+ subroutine cvd_write_corner_elements(NSPEC_AB,NGLOB_AB,ibool,&
+ np,nelement,it,nee,numpoin)
+
+ use vtk
+ implicit none
+ include 'constants.h'
+
+ integer,intent(in) :: NSPEC_AB,NGLOB_AB
+ integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
+ integer:: it,nee,np,nelement,numpoin
+
+ ! local parameters
+ logical,dimension(:),allocatable :: mask_ibool
+ integer,dimension(:),allocatable :: num_ibool
+ integer :: ispec,ier
+ integer :: iglob1, iglob2, iglob3, iglob4, iglob5, iglob6, iglob7, iglob8
+ integer :: n1, n2, n3, n4, n5, n6, n7, n8
+
+ ! outputs total number of elements for all slices
+ if (it == 1) then
+ if( USE_VTK_OUTPUT ) then
+ ! note: indices for vtk start at 0
+ write(IOVTK,'(a,i12,i12)') "CELLS ",nee,nee*9
+ else
+ call write_integer(nee)
+ endif
+ end if
+
+ ! writes out element indices
+ allocate(mask_ibool(NGLOB_AB), &
+ num_ibool(NGLOB_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array mask_ibool'
+
+ mask_ibool(:) = .false.
+ num_ibool(:) = 0
+ numpoin = 0
+ do ispec=1,NSPEC_AB
+ ! gets corner indices
+ iglob1=ibool(1,1,1,ispec)
+ iglob2=ibool(NGLLX,1,1,ispec)
+ iglob3=ibool(NGLLX,NGLLY,1,ispec)
+ iglob4=ibool(1,NGLLY,1,ispec)
+ iglob5=ibool(1,1,NGLLZ,ispec)
+ iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+ iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+ iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+
+ ! sets increasing numbering
+ if(.not. mask_ibool(iglob1)) then
+ numpoin = numpoin + 1
+ num_ibool(iglob1) = numpoin
+ mask_ibool(iglob1) = .true.
+ endif
+ if(.not. mask_ibool(iglob2)) then
+ numpoin = numpoin + 1
+ num_ibool(iglob2) = numpoin
+ mask_ibool(iglob2) = .true.
+ endif
+ if(.not. mask_ibool(iglob3)) then
+ numpoin = numpoin + 1
+ num_ibool(iglob3) = numpoin
+ mask_ibool(iglob3) = .true.
+ endif
+ if(.not. mask_ibool(iglob4)) then
+ numpoin = numpoin + 1
+ num_ibool(iglob4) = numpoin
+ mask_ibool(iglob4) = .true.
+ endif
+ if(.not. mask_ibool(iglob5)) then
+ numpoin = numpoin + 1
+ num_ibool(iglob5) = numpoin
+ mask_ibool(iglob5) = .true.
+ endif
+ if(.not. mask_ibool(iglob6)) then
+ numpoin = numpoin + 1
+ num_ibool(iglob6) = numpoin
+ mask_ibool(iglob6) = .true.
+ endif
+ if(.not. mask_ibool(iglob7)) then
+ numpoin = numpoin + 1
+ num_ibool(iglob7) = numpoin
+ mask_ibool(iglob7) = .true.
+ endif
+ if(.not. mask_ibool(iglob8)) then
+ numpoin = numpoin + 1
+ num_ibool(iglob8) = numpoin
+ mask_ibool(iglob8) = .true.
+ endif
+
+ ! outputs corner indices (starting with 0 )
+ n1 = num_ibool(iglob1) -1 + np
+ n2 = num_ibool(iglob2) -1 + np
+ n3 = num_ibool(iglob3) -1 + np
+ n4 = num_ibool(iglob4) -1 + np
+ n5 = num_ibool(iglob5) -1 + np
+ n6 = num_ibool(iglob6) -1 + np
+ n7 = num_ibool(iglob7) -1 + np
+ n8 = num_ibool(iglob8) -1 + np
+
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK,'(9i12)') 8,n1,n2,n3,n4,n5,n6,n7,n8
+ else
+ call write_integer(n1)
+ call write_integer(n2)
+ call write_integer(n3)
+ call write_integer(n4)
+ call write_integer(n5)
+ call write_integer(n6)
+ call write_integer(n7)
+ call write_integer(n8)
+ endif
+
+ enddo
+
+ ! elements written
+ nelement = NSPEC_AB
+
+ ! updates points written
+ np = np + numpoin
+
+ end subroutine cvd_write_corner_elements
+
+
+!=============================================================
+
+
+ subroutine cvd_write_GLL_elements(NSPEC_AB,NGLOB_AB,ibool, &
+ np,nelement,it,nee,numpoin)
+
+! writes out indices of elements given by GLL points
+ use vtk
+ implicit none
+ include 'constants.h'
+
+ integer,intent(in):: NSPEC_AB,NGLOB_AB
+ integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
+ integer:: it,nee,np,numpoin,nelement
+
+ ! local parameters
+ logical,dimension(:),allocatable :: mask_ibool
+ integer,dimension(:),allocatable :: num_ibool
+ integer :: ispec,i,j,k,ier
+ integer :: iglob,iglob1, iglob2, iglob3, iglob4, iglob5, iglob6, iglob7, iglob8
+ integer :: n1, n2, n3, n4, n5, n6, n7, n8
+
+ ! outputs total number of elements for all slices
+ if (it == 1) then
+ if( USE_VTK_OUTPUT ) then
+ ! note: indices for vtk start at 0
+ write(IOVTK,'(a,i12,i12)') "CELLS ",nee,nee*9
+ else
+ !nee = nelement * num_node
+ call write_integer(nee)
+ endif
+ endif
+
+ ! sets numbering num_ibool respecting mask
+ allocate(mask_ibool(NGLOB_AB), &
+ num_ibool(NGLOB_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array mask_ibool'
+
+ mask_ibool(:) = .false.
+ num_ibool(:) = 0
+ numpoin = 0
+ do ispec=1,NSPEC_AB
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool(i,j,k,ispec)
+ if(.not. mask_ibool(iglob)) then
+ numpoin = numpoin + 1
+ num_ibool(iglob) = numpoin
+ mask_ibool(iglob) = .true.
+ endif
+ enddo ! i
+ enddo ! j
+ enddo ! k
+ enddo !ispec
+
+ ! outputs GLL subelement
+ do ispec = 1, NSPEC_AB
+ do k = 1, NGLLZ-1
+ do j = 1, NGLLY-1
+ do i = 1, NGLLX-1
+ iglob1 = ibool(i,j,k,ispec)
+ iglob2 = ibool(i+1,j,k,ispec)
+ iglob3 = ibool(i+1,j+1,k,ispec)
+ iglob4 = ibool(i,j+1,k,ispec)
+ iglob5 = ibool(i,j,k+1,ispec)
+ iglob6 = ibool(i+1,j,k+1,ispec)
+ iglob7 = ibool(i+1,j+1,k+1,ispec)
+ iglob8 = ibool(i,j+1,k+1,ispec)
+ n1 = num_ibool(iglob1)+np-1
+ n2 = num_ibool(iglob2)+np-1
+ n3 = num_ibool(iglob3)+np-1
+ n4 = num_ibool(iglob4)+np-1
+ n5 = num_ibool(iglob5)+np-1
+ n6 = num_ibool(iglob6)+np-1
+ n7 = num_ibool(iglob7)+np-1
+ n8 = num_ibool(iglob8)+np-1
+
+ if( USE_VTK_OUTPUT ) then
+ write(IOVTK,'(9i12)') 8,n1,n2,n3,n4,n5,n6,n7,n8
+ else
+ call write_integer(n1)
+ call write_integer(n2)
+ call write_integer(n3)
+ call write_integer(n4)
+ call write_integer(n5)
+ call write_integer(n6)
+ call write_integer(n7)
+ call write_integer(n8)
+ endif
+ enddo
+ enddo
+ enddo
+ enddo
+ ! elements written
+ nelement = NSPEC_AB * (NGLLX-1) * (NGLLY-1) * (NGLLZ-1)
+
+ ! updates points written
+ np = np + numpoin
+
+ end subroutine cvd_write_GLL_elements
+
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_boundary_kernel.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_boundary_kernel.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_boundary_kernel.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,233 +0,0 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine compute_boundary_kernel()
-
-! isotropic topography kernel computation
-! compare with Tromp et al. (2005), eq. (25), or see Liu & Tromp (2008), eq. (65)
-
- use specfem_par
- use specfem_par_elastic
- use specfem_par_acoustic
-
- implicit none
- ! local parameters
- real(kind=CUSTOM_REAL):: kernel_moho_top,kernel_moho_bot
- integer :: i,j,k
- integer :: ispec2D,igll,jgll
- integer :: ispec_top,ispec_bot,iglob_top,iglob_bot
- logical :: is_done
-
- ! loops over top/bottom elements of moho surface
- do ispec2D = 1, NSPEC2D_MOHO
- ispec_top = ibelm_moho_top(ispec2D)
- ispec_bot = ibelm_moho_bot(ispec2D)
-
- ! elements on both sides available
- if( ispec_top > 0 .and. ispec_bot > 0 ) then
- ! loops over surface
- do igll=1,NGLLSQUARE
- i = ijk_moho_top(1,igll,ispec2D)
- j = ijk_moho_top(2,igll,ispec2D)
- k = ijk_moho_top(3,igll,ispec2D)
- iglob_top = ibool(i,j,k,ispec_top)
-
- ! computes contribution from top element
- call compute_boundary_kernel_elem( kernel_moho_top, &
- mustore(i,j,k,ispec_top), &
- kappastore(i,j,k,ispec_top),rho_vs(i,j,k,ispec_top), &
- accel(:,iglob_top),b_displ(:,iglob_top), &
- dsdx_top(:,:,i,j,k,ispec2D),b_dsdx_top(:,:,i,j,k,ispec2D), &
- normal_moho_top(:,igll,ispec2D) )
-
- ! finds corresponding global node in bottom element
- is_done = .false.
- do jgll = 1,NGLLSQUARE
- i = ijk_moho_bot(1,jgll,ispec2D)
- j = ijk_moho_bot(2,jgll,ispec2D)
- k = ijk_moho_bot(3,jgll,ispec2D)
- iglob_bot = ibool(i,j,k,ispec_bot)
-
- if( iglob_bot /= iglob_top ) cycle
- ! iglob_top == iglob_bot!
-
- ! computes contribution from bottom element
- call compute_boundary_kernel_elem( kernel_moho_bot, &
- mustore(i,j,k,ispec_bot), &
- kappastore(i,j,k,ispec_bot),rho_vs(i,j,k,ispec_bot), &
- accel(:,iglob_bot),b_displ(:,iglob_bot), &
- dsdx_bot(:,:,i,j,k,ispec2D),b_dsdx_bot(:,:,i,j,k,ispec2D), &
- normal_moho_bot(:,jgll,ispec2D) )
-
- ! note: kernel point position: indices given by ijk_moho_top(:,igll,ispec2D)
- moho_kl(igll,ispec2D) = moho_kl(igll,ispec2D) &
- + (kernel_moho_top - kernel_moho_bot) * deltat
-
- ! kernel done for this point
- is_done = .true.
- enddo
-
- ! checks
- if( .not. is_done ) then
- print*,'error : moho kernel not computed'
- print*,'ispec:',ispec_top,ispec_bot,iglob_top,i,j,k
- call exit_mpi(myrank,'error moho kernel computation')
- endif
-
- enddo
-
- ! only one element available
- ! e.g. free-surface: see Tromp et al. (2005), eq. (28)
- else if( ispec_bot > 0 .or. ispec_top > 0 ) then
-
- ! loops over surface
- do igll=1,NGLLSQUARE
-
- if( ispec_top > 0 ) then
- i = ijk_moho_top(1,igll,ispec2D)
- j = ijk_moho_top(2,igll,ispec2D)
- k = ijk_moho_top(3,igll,ispec2D)
- iglob_top = ibool(i,j,k,ispec_top)
-
- ! computes contribution from top element
- call compute_boundary_kernel_elem( kernel_moho_top, &
- mustore(i,j,k,ispec_top), &
- kappastore(i,j,k,ispec_top),rho_vs(i,j,k,ispec_top), &
- accel(:,iglob_top),b_displ(:,iglob_top), &
- dsdx_top(:,:,i,j,k,ispec2D),b_dsdx_top(:,:,i,j,k,ispec2D), &
- normal_moho_top(:,igll,ispec2D) )
-
- ! note: kernel point position igll: indices given by ijk_moho_top(:,igll,ispec2D)
- moho_kl(igll,ispec2D) = moho_kl(igll,ispec2D) + kernel_moho_top * deltat
-
- else
- i = ijk_moho_bot(1,igll,ispec2D)
- j = ijk_moho_bot(2,igll,ispec2D)
- k = ijk_moho_bot(3,igll,ispec2D)
- iglob_bot = ibool(i,j,k,ispec_bot)
-
- ! computes contribution from bottom element
- call compute_boundary_kernel_elem( kernel_moho_bot, &
- mustore(i,j,k,ispec_bot), &
- kappastore(i,j,k,ispec_bot),rho_vs(i,j,k,ispec_bot), &
- accel(:,iglob_bot),b_displ(:,iglob_bot), &
- dsdx_bot(:,:,i,j,k,ispec2D),b_dsdx_bot(:,:,i,j,k,ispec2D), &
- normal_moho_bot(:,igll,ispec2D) )
-
- ! note: kernel point position igll: indices given by ijk_moho_bot(:,igll,ispec2D)
- moho_kl(igll,ispec2D) = moho_kl(igll,ispec2D) - kernel_moho_bot * deltat
-
- endif
- enddo
- endif
- enddo ! ispec2D
-
-
-end subroutine compute_boundary_kernel
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-subroutine compute_boundary_kernel_elem(kernel, mul, kappal, rho_vsl, &
- accel, b_displ, ds, b_ds, norm)
-
-! compute the boundary kernel contribution from one side of the boundary
-! see e.g.: Tromp et al. (2005), eq. (25), or Liu & Tromp (2008), eq. (65)
-
- implicit none
- include 'constants.h'
-
- real(kind=CUSTOM_REAL) kernel, mul, kappal, rho_vsl
- real(kind=CUSTOM_REAL) :: accel(NDIM), b_displ(NDIM), ds(NDIM,NDIM), b_ds(NDIM,NDIM), norm(NDIM)
-
- real(kind=CUSTOM_REAL) :: eps3, eps(NDIM,NDIM), epsdev(NDIM,NDIM), normal(NDIM,1)
- real(kind=CUSTOM_REAL) :: b_eps3, b_eps(NDIM,NDIM), b_epsdev(NDIM,NDIM)
- real(kind=CUSTOM_REAL) :: temp1(NDIM,NDIM), rhol, kl(1,1), one_matrix(1,1)
-
-
- normal(:,1) = norm
- one_matrix(1,1) = ONE
-
- ! adjoint strain (epsilon) trace
- eps3 = ds(1,1) + ds(2,2) + ds(3,3)
-
- ! adjoint strain tensor
- eps(1,1) = ds(1,1)
- eps(2,2) = ds(2,2)
- eps(3,3) = ds(3,3)
- eps(1,2) = (ds(1,2) + ds(2,1))/2
- eps(1,3) = (ds(1,3) + ds(3,1))/2
- eps(2,3) = (ds(2,3) + ds(3,2))/2
- eps(2,1) = eps(1,2)
- eps(3,1) = eps(1,3)
- eps(3,2) = eps(2,3)
-
- ! adjoint deviatoric strain component
- epsdev = eps
- epsdev(1,1) = eps(1,1) - eps3 / 3
- epsdev(2,2) = eps(2,2) - eps3 / 3
- epsdev(3,3) = eps(3,3) - eps3 / 3
-
-
- ! backward/reconstructed-forward strain (epsilon) trace
- b_eps3 = b_ds(1,1) + b_ds(2,2) + b_ds(3,3)
-
- ! backward/reconstructed-forward strain tensor
- b_eps(1,1) = b_ds(1,1)
- b_eps(2,2) = b_ds(2,2)
- b_eps(3,3) = b_ds(3,3)
- b_eps(1,2) = (b_ds(1,2) + b_ds(2,1))/2
- b_eps(1,3) = (b_ds(1,3) + b_ds(3,1))/2
- b_eps(2,3) = (b_ds(2,3) + b_ds(3,2))/2
- b_eps(2,1) = b_eps(1,2)
- b_eps(3,1) = b_eps(1,3)
- b_eps(3,2) = b_eps(2,3)
-
- ! backward/reconstructed-forward deviatoric strain
- b_epsdev = b_eps
- b_epsdev(1,1) = b_eps(1,1) - b_eps3 / 3
- b_epsdev(2,2) = b_eps(2,2) - b_eps3 / 3
- b_epsdev(3,3) = b_eps(3,3) - b_eps3 / 3
-
- ! matrix multiplication
- temp1 = matmul(epsdev,b_epsdev)
-
- ! density value
- rhol = rho_vsl ** 2 / mul
-
- ! isotropic kernel value
- ! see e.g.: Tromp et al. (2005), eq. (25), or Liu & Tromp 2008, eq. (65)
- kl = ( rhol * dot_product(accel(:), b_displ(:)) + kappal * eps3 * b_eps3 &
- + 2 * mul * (temp1(1,1) + temp1(2,2) + temp1(3,3)) ) * one_matrix &
- - kappal * matmul(transpose(normal),matmul(eps,normal)) * b_eps3 &
- - kappal * matmul(transpose(normal),matmul(b_eps,normal)) * eps3 &
- - 2 * mul * matmul(transpose(normal), matmul(matmul(b_epsdev,ds), normal)) &
- - 2 * mul * matmul(transpose(normal), matmul(matmul(epsdev,b_ds), normal))
-
- kernel = kl(1,1)
-
-end subroutine compute_boundary_kernel_elem
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_kernels.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_kernels.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,236 +0,0 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine compute_kernels()
-
-! kernel calculations
-! see e.g. Tromp et al. (2005)
-
- use specfem_par
- use specfem_par_elastic
- use specfem_par_acoustic
-
- implicit none
- ! local parameters
- real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ):: b_displ_elm,accel_elm
- real(kind=CUSTOM_REAL) :: kappal,rhol
- integer :: i,j,k,ispec,iglob
- real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc,b_epsilondev_loc
-
- if(.NOT. GPU_MODE) then
-
- ! updates kernels
- do ispec = 1, NSPEC_AB
-
- ! elastic domains
- if( ispec_is_elastic(ispec) ) then
-
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool(i,j,k,ispec)
-
- ! isotropic kernels
- ! note: takes displacement from backward/reconstructed (forward) field b_displ
- ! and acceleration from adjoint field accel (containing adjoint sources)
- !
- ! note: : time integral summation uses deltat
- !
- ! compare with Tromp et al. (2005), eq. (14), which takes adjoint displacement
- ! and forward acceleration, that is the symmetric form of what is calculated here
- ! however, this kernel expression is symmetric with regards
- ! to interchange adjoint - forward field
- rho_kl(i,j,k,ispec) = rho_kl(i,j,k,ispec) &
- + deltat * dot_product(accel(:,iglob), b_displ(:,iglob))
-
- ! kernel for shear modulus, see e.g. Tromp et al. (2005), equation (17)
- ! note: multiplication with 2*mu(x) will be done after the time loop
- epsilondev_loc(1) = epsilondev_xx(i,j,k,ispec)
- epsilondev_loc(2) = epsilondev_yy(i,j,k,ispec)
- epsilondev_loc(3) = epsilondev_xy(i,j,k,ispec)
- epsilondev_loc(4) = epsilondev_xz(i,j,k,ispec)
- epsilondev_loc(5) = epsilondev_yz(i,j,k,ispec)
-
- b_epsilondev_loc(1) = b_epsilondev_xx(i,j,k,ispec)
- b_epsilondev_loc(2) = b_epsilondev_yy(i,j,k,ispec)
- b_epsilondev_loc(3) = b_epsilondev_xy(i,j,k,ispec)
- b_epsilondev_loc(4) = b_epsilondev_xz(i,j,k,ispec)
- b_epsilondev_loc(5) = b_epsilondev_yz(i,j,k,ispec)
-
- mu_kl(i,j,k,ispec) = mu_kl(i,j,k,ispec) &
- + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
- + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
- + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
- epsilondev_loc(5)*b_epsilondev_loc(5)) )
-
- ! kernel for bulk modulus, see e.g. Tromp et al. (2005), equation (18)
- ! note: multiplication with kappa(x) will be done after the time loop
- kappa_kl(i,j,k,ispec) = kappa_kl(i,j,k,ispec) &
- + deltat * (9 * epsilon_trace_over_3(i,j,k,ispec) &
- * b_epsilon_trace_over_3(i,j,k,ispec))
-
- enddo
- enddo
- enddo
- endif !ispec_is_elastic
-
- ! acoustic domains
- if( ispec_is_acoustic(ispec) ) then
-
- ! backward fields: displacement vector
- call compute_gradient(ispec,NSPEC_ADJOINT,NGLOB_ADJOINT, &
- b_potential_acoustic, b_displ_elm,&
- hprime_xx,hprime_yy,hprime_zz, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- ibool,rhostore)
- ! adjoint fields: acceleration vector
- call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
- potential_dot_dot_acoustic, accel_elm,&
- hprime_xx,hprime_yy,hprime_zz, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- ibool,rhostore)
-
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool(i,j,k,ispec)
-
- ! density kernel
- rhol = rhostore(i,j,k,ispec)
- rho_ac_kl(i,j,k,ispec) = rho_ac_kl(i,j,k,ispec) &
- - deltat * rhol * dot_product(accel_elm(:,i,j,k), b_displ_elm(:,i,j,k))
-
- ! bulk modulus kernel
- kappal = kappastore(i,j,k,ispec)
- kappa_ac_kl(i,j,k,ispec) = kappa_ac_kl(i,j,k,ispec) &
- - deltat / kappal &
- * potential_dot_dot_acoustic(iglob) &
- * b_potential_dot_dot_acoustic(iglob)
-
- enddo
- enddo
- enddo
- endif ! ispec_is_acoustic
-
- enddo
- else ! GPU_MODE==1
-
- call compute_kernels_cuda(Mesh_pointer,NOISE_TOMOGRAPHY,ELASTIC_SIMULATION,&
- SAVE_MOHO_MESH,deltat)
- endif ! GPU_MODE
- ! moho kernel
- if( ELASTIC_SIMULATION .and. SAVE_MOHO_MESH ) then
- call compute_boundary_kernel()
- endif
-
- ! for noise simulations --- source strength kernel
- if (NOISE_TOMOGRAPHY == 3) &
- call compute_kernels_strength_noise(NGLLSQUARE*num_free_surface_faces,ibool, &
- sigma_kl,displ,deltat,it, &
- normal_x_noise,normal_y_noise,normal_z_noise, &
- noise_surface_movie, &
- NSPEC_AB,NGLOB_AB, &
- num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
- GPU_MODE,Mesh_pointer)
-
- ! computes an approximative hessian for preconditioning kernels
- if ( APPROXIMATE_HESS_KL ) then
- call compute_kernels_hessian()
- endif
-
- end subroutine compute_kernels
-
-
-!-----------------------------------------------------------------------------
-
- subroutine compute_kernels_hessian()
-
- use specfem_par
- use specfem_par_elastic
- use specfem_par_acoustic
-
- implicit none
- ! local parameters
- real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ):: b_accel_elm,accel_elm
- integer :: i,j,k,ispec,iglob
-
- ! loops over all elements
- do ispec = 1, NSPEC_AB
-
- ! acoustic domains
- if( ispec_is_acoustic(ispec) ) then
-
- ! adjoint fields: acceleration vector
- call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
- potential_dot_dot_acoustic, accel_elm,&
- hprime_xx,hprime_yy,hprime_zz, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- ibool,rhostore)
-
- ! adjoint fields: acceleration vector
- call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
- b_potential_dot_dot_acoustic, b_accel_elm,&
- hprime_xx,hprime_yy,hprime_zz, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- ibool,rhostore)
-
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool(i,j,k,ispec)
-
- ! approximates hessian
- ! term with adjoint acceleration and backward/reconstructed acceleration
- hess_ac_kl(i,j,k,ispec) = hess_ac_kl(i,j,k,ispec) &
- + deltat * dot_product(accel_elm(:,i,j,k), b_accel_elm(:,i,j,k))
-
- enddo
- enddo
- enddo
- endif
-
- ! elastic domains
- if( ispec_is_elastic(ispec) ) then
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool(i,j,k,ispec)
-
- ! approximates hessian
- ! term with adjoint acceleration and backward/reconstructed acceleration
- hess_kl(i,j,k,ispec) = hess_kl(i,j,k,ispec) &
- + deltat * dot_product(accel(:,iglob), b_accel(:,iglob))
-
- enddo
- enddo
- enddo
- endif
-
- enddo
-
- end subroutine compute_kernels_hessian
-
-
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_parameter_file.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_parameter_file.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_parameter_file.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,233 +1,238 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine read_parameter_file( NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
- UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
- ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
- OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
- MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
- NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
- SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION,NTSTEP_BETWEEN_OUTPUT_INFO, &
- SIMULATION_TYPE,SAVE_FORWARD, &
- NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY )
-
- implicit none
-
- include "constants.h"
-
- integer NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,SIMULATION_TYPE, NTSTEP_BETWEEN_READ_ADJSRC
- integer NSOURCES,NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO,UTM_PROJECTION_ZONE
- integer NOISE_TOMOGRAPHY
-
- double precision DT,HDUR_MOVIE
-
- logical ATTENUATION,USE_OLSEN_ATTENUATION,OCEANS,TOPOGRAPHY,ABSORBING_CONDITIONS,SAVE_FORWARD
- logical MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT,USE_HIGHRES_FOR_MOVIES
- logical ANISOTROPY,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION,SUPPRESS_UTM_PROJECTION
-
- character(len=256) LOCAL_PATH,CMTSOLUTION
-
-! local variables
- integer ::ios,icounter,isource,idummy,nproc_eta_old,nproc_xi_old
- double precision :: hdur,minval_hdur
- character(len=256) :: dummystring
- integer, external :: err_occurred
-
- ! opens file Par_file
- call open_parameter_file()
-
- ! reads in parameters
- call read_value_integer(SIMULATION_TYPE, 'solver.SIMULATION_TYPE')
- if(err_occurred() /= 0) return
- call read_value_integer(NOISE_TOMOGRAPHY, 'solver.NOISE_TOMOGRAPHY')
- if(err_occurred() /= 0) return
- call read_value_logical(SAVE_FORWARD, 'solver.SAVE_FORWARD')
- if(err_occurred() /= 0) return
- call read_value_integer(UTM_PROJECTION_ZONE, 'mesher.UTM_PROJECTION_ZONE')
- if(err_occurred() /= 0) return
- call read_value_logical(SUPPRESS_UTM_PROJECTION, 'mesher.SUPPRESS_UTM_PROJECTION')
- if(err_occurred() /= 0) return
- ! total number of processors
- call read_value_integer(NPROC, 'mesher.NPROC')
- if(err_occurred() /= 0) then
- ! checks if it's using an old Par_file format
- call read_value_integer(nproc_eta_old, 'mesher.NPROC_ETA')
- if( err_occurred() /= 0 ) then
- print*,'please specify the number of processes in Par_file as:'
- print*,'NPROC = <my_number_of_desired_processes> '
- return
- endif
- ! checks if it's using an old Par_file format
- call read_value_integer(nproc_xi_old, 'mesher.NPROC_XI')
- if( err_occurred() /= 0 ) then
- print*,'please specify the number of processes in Par_file as:'
- print*,'NPROC = <my_number_of_desired_processes> '
- return
- endif
- NPROC = nproc_eta_old * nproc_xi_old
- endif
- call read_value_integer(NSTEP, 'solver.NSTEP')
- if(err_occurred() /= 0) return
- call read_value_double_precision(DT, 'solver.DT')
- if(err_occurred() /= 0) return
- call read_value_logical(OCEANS, 'model.OCEANS')
- if(err_occurred() /= 0) return
- call read_value_logical(TOPOGRAPHY, 'model.TOPOGRAPHY')
- if(err_occurred() /= 0) return
- call read_value_logical(ATTENUATION, 'model.ATTENUATION')
- if(err_occurred() /= 0) return
- call read_value_logical(USE_OLSEN_ATTENUATION, 'model.USE_OLSEN_ATTENUATION')
- if(err_occurred() /= 0) return
- call read_value_logical(ANISOTROPY, 'model.ANISOTROPY')
- if(err_occurred() /= 0) return
- call read_value_logical(ABSORBING_CONDITIONS, 'solver.ABSORBING_CONDITIONS')
- if(err_occurred() /= 0) return
- call read_value_logical(MOVIE_SURFACE, 'solver.MOVIE_SURFACE')
- if(err_occurred() /= 0) return
- call read_value_logical(MOVIE_VOLUME, 'solver.MOVIE_VOLUME')
- if(err_occurred() /= 0) return
- call read_value_integer(NTSTEP_BETWEEN_FRAMES, 'solver.NTSTEP_BETWEEN_FRAMES')
- if(err_occurred() /= 0) return
- call read_value_logical(CREATE_SHAKEMAP, 'solver.CREATE_SHAKEMAP')
- if(err_occurred() /= 0) return
- call read_value_logical(SAVE_DISPLACEMENT, 'solver.SAVE_DISPLACEMENT')
- if(err_occurred() /= 0) return
- call read_value_logical(USE_HIGHRES_FOR_MOVIES, 'solver.USE_HIGHRES_FOR_MOVIES')
- if(err_occurred() /= 0) return
- call read_value_double_precision(HDUR_MOVIE, 'solver.HDUR_MOVIE')
- if(err_occurred() /= 0) return
- call read_value_logical(SAVE_MESH_FILES, 'mesher.SAVE_MESH_FILES')
- if(err_occurred() /= 0) return
- call read_value_string(LOCAL_PATH, 'LOCAL_PATH')
- if(err_occurred() /= 0) return
- call read_value_integer(NTSTEP_BETWEEN_OUTPUT_INFO, 'solver.NTSTEP_BETWEEN_OUTPUT_INFO')
- if(err_occurred() /= 0) return
- call read_value_integer(NTSTEP_BETWEEN_OUTPUT_SEISMOS, 'solver.NTSTEP_BETWEEN_OUTPUT_SEISMOS')
- if(err_occurred() /= 0) return
- call read_value_integer(NTSTEP_BETWEEN_READ_ADJSRC, 'solver.NTSTEP_BETWEEN_READ_ADJSRC')
- if(err_occurred() /= 0) return
- call read_value_logical(PRINT_SOURCE_TIME_FUNCTION, 'solver.PRINT_SOURCE_TIME_FUNCTION')
- if(err_occurred() /= 0) return
-
- ! noise simulations:
- ! double the number of time steps, if running noise simulations (+/- branches)
- if ( NOISE_TOMOGRAPHY /= 0 ) NSTEP = 2*NSTEP-1
-
- ! the default value of NTSTEP_BETWEEN_READ_ADJSRC (0) is to read the whole trace at the same time
- if ( NTSTEP_BETWEEN_READ_ADJSRC == 0 ) NTSTEP_BETWEEN_READ_ADJSRC = NSTEP
-
- ! total times steps must be dividable by adjoint source chunks/blocks
- if ( mod(NSTEP,NTSTEP_BETWEEN_READ_ADJSRC) /= 0 ) then
- print*,'error: mod(NSTEP,NTSTEP_BETWEEN_READ_ADJSRC) must be zero!'
- print*,' change your Par_file (when NOISE_TOMOGRAPHY is not equal to zero, ACTUAL_NSTEP=2*NSTEP-1)'
- stop 'mod(NSTEP,NTSTEP_BETWEEN_READ_ADJSRC) must be zero!'
- endif
-
- ! for noise simulations, we need to save movies at the surface (where the noise is generated)
- ! and thus we force MOVIE_SURFACE to be .true., in order to use variables defined for surface movies later
- if ( NOISE_TOMOGRAPHY /= 0 ) then
- MOVIE_SURFACE = .true.
- CREATE_SHAKEMAP = .false. ! CREATE_SHAKEMAP and MOVIE_SURFACE cannot be both .true.
- USE_HIGHRES_FOR_MOVIES = .true. ! we need to save surface movie everywhere, i.e. at all GLL points on the surface
- ! since there are several flags involving surface movies, check compatability
- if ( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP ) then
- print*, 'error: when running noise simulations ( NOISE_TOMOGRAPHY /= 0 ),'
- print*, ' we can NOT use EXTERNAL_MESH_MOVIE_SURFACE or EXTERNAL_MESH_CREATE_SHAKEMAP'
- print*, ' change EXTERNAL_MESH_MOVIE_SURFACE & EXTERNAL_MESH_CREATE_SHAKEMAP in constant.h'
- stop 'incompatible NOISE_TOMOGRAPHY, EXTERNAL_MESH_MOVIE_SURFACE, EXTERNAL_MESH_CREATE_SHAKEMAP'
- endif
- endif
-
- ! compute the total number of sources in the CMTSOLUTION file
- ! there are NLINES_PER_CMTSOLUTION_SOURCE lines per source in that file
- call get_value_string(CMTSOLUTION, 'solver.CMTSOLUTION',&
- IN_DATA_FILES_PATH(1:len_trim(IN_DATA_FILES_PATH))//'CMTSOLUTION')
-
- open(unit=1,file=CMTSOLUTION,iostat=ios,status='old',action='read')
- if(ios /= 0) stop 'error opening CMTSOLUTION file'
-
- icounter = 0
- do while(ios == 0)
- read(1,"(a)",iostat=ios) dummystring
- if(ios == 0) icounter = icounter + 1
- enddo
- close(1)
-
- if(mod(icounter,NLINES_PER_CMTSOLUTION_SOURCE) /= 0) &
- stop 'total number of lines in CMTSOLUTION file should be a multiple of NLINES_PER_CMTSOLUTION_SOURCE'
-
- NSOURCES = icounter / NLINES_PER_CMTSOLUTION_SOURCE
- if(NSOURCES < 1) stop 'need at least one source in CMTSOLUTION file'
-
- ! compute the minimum value of hdur in CMTSOLUTION file
- open(unit=1,file=CMTSOLUTION,status='old',action='read')
- minval_hdur = HUGEVAL
- do isource = 1,NSOURCES
-
- ! skip other information
- do idummy = 1,3
- read(1,"(a)") dummystring
- enddo
-
- ! read half duration and compute minimum
- read(1,"(a)") dummystring
- read(dummystring(15:len_trim(dummystring)),*) hdur
- minval_hdur = min(minval_hdur,hdur)
-
- ! skip other information
- do idummy = 1,9
- read(1,"(a)") dummystring
- enddo
-
- enddo
- close(1)
-
-! one cannot use a Heaviside source for the movies
- if((MOVIE_SURFACE .or. MOVIE_VOLUME) .and. sqrt(minval_hdur**2 + HDUR_MOVIE**2) < TINYVAL) &
- stop 'hdur too small for movie creation, movies do not make sense for Heaviside source'
-
-! close parameter file
- call close_parameter_file()
-
- end subroutine read_parameter_file
-
-
-subroutine read_gpu_mode(GPU_MODE)
-
- implicit none
- include "constants.h"
-
- logical GPU_MODE
- ! opens file Par_file
-
- call open_parameter_file()
-
- call read_value_logical(GPU_MODE, 'solver.GPU_MODE')
-
- ! close parameter file
- call close_parameter_file()
-
-end subroutine read_gpu_mode
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine read_parameter_file( NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,DT, &
+ UTM_PROJECTION_ZONE,SUPPRESS_UTM_PROJECTION, &
+ ATTENUATION,USE_OLSEN_ATTENUATION,LOCAL_PATH,NSOURCES, &
+ OCEANS,TOPOGRAPHY,ANISOTROPY,ABSORBING_CONDITIONS, &
+ MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT, &
+ NTSTEP_BETWEEN_FRAMES,USE_HIGHRES_FOR_MOVIES,HDUR_MOVIE, &
+ SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION,NTSTEP_BETWEEN_OUTPUT_INFO, &
+ SIMULATION_TYPE,SAVE_FORWARD, &
+ NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY )
+
+ implicit none
+
+ include "constants.h"
+
+ integer NPROC,NTSTEP_BETWEEN_OUTPUT_SEISMOS,NSTEP,SIMULATION_TYPE, NTSTEP_BETWEEN_READ_ADJSRC
+ integer NSOURCES,NTSTEP_BETWEEN_FRAMES,NTSTEP_BETWEEN_OUTPUT_INFO,UTM_PROJECTION_ZONE
+ integer NOISE_TOMOGRAPHY
+
+ double precision DT,HDUR_MOVIE
+
+ logical ATTENUATION,USE_OLSEN_ATTENUATION,OCEANS,TOPOGRAPHY,ABSORBING_CONDITIONS,SAVE_FORWARD
+ logical MOVIE_SURFACE,MOVIE_VOLUME,CREATE_SHAKEMAP,SAVE_DISPLACEMENT,USE_HIGHRES_FOR_MOVIES
+ logical ANISOTROPY,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION,SUPPRESS_UTM_PROJECTION
+
+ character(len=256) LOCAL_PATH,CMTSOLUTION
+
+! local variables
+ integer ::ios,icounter,isource,idummy,nproc_eta_old,nproc_xi_old
+ double precision :: hdur,minval_hdur
+ character(len=256) :: dummystring
+ integer, external :: err_occurred
+
+ ! opens file Par_file
+ call open_parameter_file()
+
+ ! reads in parameters
+ call read_value_integer(SIMULATION_TYPE, 'solver.SIMULATION_TYPE')
+ if(err_occurred() /= 0) return
+ call read_value_integer(NOISE_TOMOGRAPHY, 'solver.NOISE_TOMOGRAPHY')
+ if(err_occurred() /= 0) return
+ call read_value_logical(SAVE_FORWARD, 'solver.SAVE_FORWARD')
+ if(err_occurred() /= 0) return
+ call read_value_integer(UTM_PROJECTION_ZONE, 'mesher.UTM_PROJECTION_ZONE')
+ if(err_occurred() /= 0) return
+ call read_value_logical(SUPPRESS_UTM_PROJECTION, 'mesher.SUPPRESS_UTM_PROJECTION')
+ if(err_occurred() /= 0) return
+ ! total number of processors
+ call read_value_integer(NPROC, 'mesher.NPROC')
+ if(err_occurred() /= 0) then
+ ! checks if it's using an old Par_file format
+ call read_value_integer(nproc_eta_old, 'mesher.NPROC_ETA')
+ if( err_occurred() /= 0 ) then
+ print*,'please specify the number of processes in Par_file as:'
+ print*,'NPROC = <my_number_of_desired_processes> '
+ return
+ endif
+ ! checks if it's using an old Par_file format
+ call read_value_integer(nproc_xi_old, 'mesher.NPROC_XI')
+ if( err_occurred() /= 0 ) then
+ print*,'please specify the number of processes in Par_file as:'
+ print*,'NPROC = <my_number_of_desired_processes> '
+ return
+ endif
+ NPROC = nproc_eta_old * nproc_xi_old
+ endif
+ call read_value_integer(NSTEP, 'solver.NSTEP')
+ if(err_occurred() /= 0) return
+ call read_value_double_precision(DT, 'solver.DT')
+ if(err_occurred() /= 0) return
+ call read_value_logical(OCEANS, 'model.OCEANS')
+ if(err_occurred() /= 0) return
+ call read_value_logical(TOPOGRAPHY, 'model.TOPOGRAPHY')
+ if(err_occurred() /= 0) return
+ call read_value_logical(ATTENUATION, 'model.ATTENUATION')
+ if(err_occurred() /= 0) return
+ call read_value_logical(USE_OLSEN_ATTENUATION, 'model.USE_OLSEN_ATTENUATION')
+ if(err_occurred() /= 0) return
+ call read_value_logical(ANISOTROPY, 'model.ANISOTROPY')
+ if(err_occurred() /= 0) return
+ call read_value_logical(ABSORBING_CONDITIONS, 'solver.ABSORBING_CONDITIONS')
+ if(err_occurred() /= 0) return
+ call read_value_logical(MOVIE_SURFACE, 'solver.MOVIE_SURFACE')
+ if(err_occurred() /= 0) return
+ call read_value_logical(MOVIE_VOLUME, 'solver.MOVIE_VOLUME')
+ if(err_occurred() /= 0) return
+ call read_value_integer(NTSTEP_BETWEEN_FRAMES, 'solver.NTSTEP_BETWEEN_FRAMES')
+ if(err_occurred() /= 0) return
+ call read_value_logical(CREATE_SHAKEMAP, 'solver.CREATE_SHAKEMAP')
+ if(err_occurred() /= 0) return
+ call read_value_logical(SAVE_DISPLACEMENT, 'solver.SAVE_DISPLACEMENT')
+ if(err_occurred() /= 0) return
+ call read_value_logical(USE_HIGHRES_FOR_MOVIES, 'solver.USE_HIGHRES_FOR_MOVIES')
+ if(err_occurred() /= 0) return
+ call read_value_double_precision(HDUR_MOVIE, 'solver.HDUR_MOVIE')
+ if(err_occurred() /= 0) return
+ call read_value_logical(SAVE_MESH_FILES, 'mesher.SAVE_MESH_FILES')
+ if(err_occurred() /= 0) return
+ call read_value_string(LOCAL_PATH, 'LOCAL_PATH')
+ if(err_occurred() /= 0) return
+ call read_value_integer(NTSTEP_BETWEEN_OUTPUT_INFO, 'solver.NTSTEP_BETWEEN_OUTPUT_INFO')
+ if(err_occurred() /= 0) return
+ call read_value_integer(NTSTEP_BETWEEN_OUTPUT_SEISMOS, 'solver.NTSTEP_BETWEEN_OUTPUT_SEISMOS')
+ if(err_occurred() /= 0) return
+ call read_value_integer(NTSTEP_BETWEEN_READ_ADJSRC, 'solver.NTSTEP_BETWEEN_READ_ADJSRC')
+ if(err_occurred() /= 0) return
+ call read_value_logical(PRINT_SOURCE_TIME_FUNCTION, 'solver.PRINT_SOURCE_TIME_FUNCTION')
+ if(err_occurred() /= 0) return
+
+ ! noise simulations:
+ ! double the number of time steps, if running noise simulations (+/- branches)
+ if ( NOISE_TOMOGRAPHY /= 0 ) NSTEP = 2*NSTEP-1
+
+ ! the default value of NTSTEP_BETWEEN_READ_ADJSRC (0) is to read the whole trace at the same time
+ if ( NTSTEP_BETWEEN_READ_ADJSRC == 0 ) NTSTEP_BETWEEN_READ_ADJSRC = NSTEP
+
+ ! total times steps must be dividable by adjoint source chunks/blocks
+ if ( mod(NSTEP,NTSTEP_BETWEEN_READ_ADJSRC) /= 0 ) then
+ print*,'error: mod(NSTEP,NTSTEP_BETWEEN_READ_ADJSRC) must be zero!'
+ print*,' change your Par_file (when NOISE_TOMOGRAPHY is not equal to zero, ACTUAL_NSTEP=2*NSTEP-1)'
+ stop 'mod(NSTEP,NTSTEP_BETWEEN_READ_ADJSRC) must be zero!'
+ endif
+
+ ! for noise simulations, we need to save movies at the surface (where the noise is generated)
+ ! and thus we force MOVIE_SURFACE to be .true., in order to use variables defined for surface movies later
+ if ( NOISE_TOMOGRAPHY /= 0 ) then
+ MOVIE_SURFACE = .true.
+ CREATE_SHAKEMAP = .false. ! CREATE_SHAKEMAP and MOVIE_SURFACE cannot be both .true.
+ USE_HIGHRES_FOR_MOVIES = .true. ! we need to save surface movie everywhere, i.e. at all GLL points on the surface
+ ! since there are several flags involving surface movies, check compatability
+ if ( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP ) then
+ print*, 'error: when running noise simulations ( NOISE_TOMOGRAPHY /= 0 ),'
+ print*, ' we can NOT use EXTERNAL_MESH_MOVIE_SURFACE or EXTERNAL_MESH_CREATE_SHAKEMAP'
+ print*, ' change EXTERNAL_MESH_MOVIE_SURFACE & EXTERNAL_MESH_CREATE_SHAKEMAP in constant.h'
+ stop 'incompatible NOISE_TOMOGRAPHY, EXTERNAL_MESH_MOVIE_SURFACE, EXTERNAL_MESH_CREATE_SHAKEMAP'
+ endif
+ endif
+
+ ! compute the total number of sources in the CMTSOLUTION file
+ ! there are NLINES_PER_CMTSOLUTION_SOURCE lines per source in that file
+ call get_value_string(CMTSOLUTION, 'solver.CMTSOLUTION',&
+ IN_DATA_FILES_PATH(1:len_trim(IN_DATA_FILES_PATH))//'CMTSOLUTION')
+
+ open(unit=1,file=CMTSOLUTION,iostat=ios,status='old',action='read')
+ if(ios /= 0) stop 'error opening CMTSOLUTION file'
+
+ icounter = 0
+ do while(ios == 0)
+ read(1,"(a)",iostat=ios) dummystring
+ if(ios == 0) icounter = icounter + 1
+ enddo
+ close(1)
+
+ if(mod(icounter,NLINES_PER_CMTSOLUTION_SOURCE) /= 0) &
+ stop 'total number of lines in CMTSOLUTION file should be a multiple of NLINES_PER_CMTSOLUTION_SOURCE'
+
+ NSOURCES = icounter / NLINES_PER_CMTSOLUTION_SOURCE
+ if(NSOURCES < 1) stop 'need at least one source in CMTSOLUTION file'
+
+ ! compute the minimum value of hdur in CMTSOLUTION file
+ open(unit=1,file=CMTSOLUTION,status='old',action='read')
+ minval_hdur = HUGEVAL
+ do isource = 1,NSOURCES
+
+ ! skip other information
+ do idummy = 1,3
+ read(1,"(a)") dummystring
+ enddo
+
+ ! read half duration and compute minimum
+ read(1,"(a)") dummystring
+ read(dummystring(15:len_trim(dummystring)),*) hdur
+ minval_hdur = min(minval_hdur,hdur)
+
+ ! skip other information
+ do idummy = 1,9
+ read(1,"(a)") dummystring
+ enddo
+
+ enddo
+ close(1)
+
+! one cannot use a Heaviside source for the movies
+ if((MOVIE_SURFACE .or. MOVIE_VOLUME) .and. sqrt(minval_hdur**2 + HDUR_MOVIE**2) < TINYVAL) &
+ stop 'hdur too small for movie creation, movies do not make sense for Heaviside source'
+
+! close parameter file
+ call close_parameter_file()
+
+ end subroutine read_parameter_file
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_gpu_mode(GPU_MODE)
+
+ implicit none
+ include "constants.h"
+
+ logical GPU_MODE
+ ! opens file Par_file
+
+ call open_parameter_file()
+
+ call read_value_logical(GPU_MODE, 'solver.GPU_MODE')
+
+ ! close parameter file
+ call close_parameter_file()
+
+ end subroutine read_gpu_mode
+
+
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_and_compare_cpu_vs_gpu.c
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_and_compare_cpu_vs_gpu.c 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/save_and_compare_cpu_vs_gpu.c 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,274 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <errno.h>
-#include <mpi.h>
-#define MAX(a, b) (((a) > (b)) ? (a) : (b))
-
-void save_to_max_surface_file_(float* maxval) {
- int rank;
- char filename[BUFSIZ];
- FILE* fp;
- MPI_Comm_rank(MPI_COMM_WORLD,&rank);
- sprintf(filename,"maxval_surface_proc_%03d.dat",rank);
- fp = fopen(filename,"a+");
- fprintf(fp,"%e\n",*maxval);
- fclose(fp);
-}
-
-void save_fvector_(float* vector, int* size, int* id, int* cpu_or_gpu) {
- FILE* fp;
- char filename[BUFSIZ];
- if(*cpu_or_gpu == 0) {
- sprintf(filename, "debug_output_cpu_%d.dat",*id);
- }
- else {
- sprintf(filename, "debug_output_gpu_%d.dat",*id);
- }
- fp = fopen(filename, "wb");
- printf("writing vector, vector[0]=%e\n",vector[0]);
- fwrite(vector, sizeof(float), *size, fp);
- fclose(fp);
-
-}
-
-void save_ivector_(int* vector, int* size, int* id, int* cpu_or_gpu) {
- FILE* fp;
- char filename[BUFSIZ];
- if(*cpu_or_gpu == 0) {
- sprintf(filename, "debug_output_cpu_%d.dat",*id);
- }
- else {
- sprintf(filename, "debug_output_gpu_%d.dat",*id);
- }
- fp = fopen(filename, "wb");
- fwrite(vector, sizeof(int), *size, fp);
- fclose(fp);
-
-}
-
-
-void get_max_from_surface_file_(int* nodes_per_iterationf,int* NSTEP) {
- int nodes_per_iteration = *nodes_per_iterationf;
- char filename[BUFSIZ];
- int procid;
- MPI_Comm_rank(MPI_COMM_WORLD,&procid);
- sprintf(filename,"/scratch/eiger/rietmann/SPECFEM3D_AIGLE/in_out_files/DATABASES_MPI/proc%06d_surface_movie",procid);
-
- FILE* fp; int it;
- printf("Opening %s for analysis\n",filename);
- fp = fopen(filename,"rb");
- char* errorstr;
- if(fp == 0) {
- errorstr = strerror(errno);
- printf("FILE ERROR:%s\n",errorstr);
- perror("file error\n");
- exit(1);
- }
-
- float* vector = (float*)malloc(nodes_per_iteration*sizeof(float));
- float max_val;
- int i;
- for(it=0;it<*NSTEP;it++) {
- int pos = (sizeof(float)*nodes_per_iteration)*(it);
- fseek(fp,pos,SEEK_SET);
- fread(vector,sizeof(float),nodes_per_iteration,fp);
- for(i=0;i<nodes_per_iteration;i++) {
- max_val = MAX(max_val,vector[i]);
- }
- if(it % 500 == 0) {
- printf("scanning it=%d\n",it);
- }
- }
- printf("max_val=%e\n",max_val);
-}
-
-void compare_two_vectors_exact_(int* sizef,float* vector1,float* vector2,int* num_errors) {
-
- int size = *sizef;
- int i;
- int error_count = 0;
-
- for(i=0;i<size;++i) {
- if(vector1[i] != vector2[i]) {
- error_count++;
- if(error_count < 10) {
- printf("err[%d]: %e != %e\n",i,vector1[i],vector2[i]);
- }
- }
- }
- printf("**** Error Count: %d ****\n",error_count);
- *num_errors = error_count;
-}
-
-void compare_two_vectors_(int* sizef,float* vector1,float* vector2,int* num_errors) {
-
- int size = *sizef;
- int i;
- int error_count = 0;
- for(i=0;i<size;++i) {
- if(vector1[i] != 0) {
- if( fabsf(vector1[i]-vector2[i])/vector1[i] > 0.01) {
- if(fabsf(vector1[i]-vector2[i]) > 1e-20) {
- error_count++;
- if(error_count<10) {
- printf("err[%d]: %e != %e\n",i,vector1[i],vector2[i]);
- }
- }
- }
- }
- /* if(vector1[i] != vector2[i]) { */
- /* if(fabsf(vector1[i]-vector2[i]) > 1e-25) { */
- /* error_count++; */
- /* if(error_count<50) { */
- /* printf("err[%d]: %e != %e\n",i,vector1[i],vector2[i]); */
- /* } */
- /* } */
- /* } */
- }
- printf("**** Error Count: %d ****\n",error_count);
- *num_errors = error_count;
-}
-
-void compare_surface_files_(int* bytes_per_iteration, int* number_of_iterations) {
-
- char* cpu_file = "/scratch/eiger/rietmann/SPECFEM3D/in_out_files/DATABASES_MPI/cpu_proc000001_surface_movie";
- char* gpu_file = "/scratch/eiger/rietmann/SPECFEM3D/in_out_files/DATABASES_MPI/cpu_v2_proc000001_surface_movie";
-
- FILE* fp_cpu;
- fp_cpu = fopen(cpu_file,"rb");
- char* errorstr;
- if(fp_cpu == 0) {
- errorstr = strerror(errno);
- printf("CPU FILE ERROR:%s\n",errorstr);
- perror("cpu file error\n");
- }
- FILE* fp_gpu;
- fp_gpu = fopen(gpu_file,"rb");
-
- if(fp_gpu == NULL) {
- errorstr = strerror(errno);
- printf("GPU FILE ERROR:%s\n",errorstr);
- perror("gpu file error\n");
- }
-
- /* pause_for_debug(); */
-
- float* gpu_vector = (float*)malloc(*bytes_per_iteration);
- float* cpu_vector = (float*)malloc(*bytes_per_iteration);
- int i,it,error_count=0;
- for(it=0;it<*number_of_iterations;it++) {
- int pos = (*bytes_per_iteration)*(it);
-
- fseek(fp_cpu,pos,SEEK_SET);
- fseek(fp_gpu,pos,SEEK_SET);
-
- int number_of_nodes = *bytes_per_iteration/sizeof(float);
- fread(cpu_vector,sizeof(float),number_of_nodes,fp_cpu);
- fread(gpu_vector,sizeof(float),number_of_nodes,fp_gpu);
- int size = number_of_nodes;
- float gpu_min_val=10;
- float gpu_max_val=0;
- float cpu_min_val=10;
- float cpu_max_val=0;
- if(it<100) {
- for(i=0;i<size;i++) {
- if((fabs(cpu_vector[i] - gpu_vector[i])/(fabs(cpu_vector[i])+1e-31) > 0.01)) {
- if(error_count < 30) printf("ERROR[%d]: %g != %g\n",i,cpu_vector[i], gpu_vector[i]);
- if(cpu_vector[i] > 1e-30) error_count++;
- }
- if(gpu_vector[i]>gpu_max_val) gpu_max_val = gpu_vector[i];
- if(gpu_vector[i]<gpu_min_val) gpu_min_val = gpu_vector[i];
- if(cpu_vector[i]>cpu_max_val) cpu_max_val = cpu_vector[i];
- if(cpu_vector[i]<cpu_min_val) cpu_min_val = cpu_vector[i];
- }
- printf("%d Total Errors\n",error_count);
- printf("size:%d\n",size);
- printf("GPU:[min/max]=%e/%e\n",gpu_min_val,gpu_max_val);
- printf("CPU:[min/max]=%e/%e\n",cpu_min_val,cpu_max_val);
- }
- }
- printf("End of Surface Compare\n");
- exit(1);
-}
-
-
-void compare_fvector_(float* vector, int* size, int* id, int* cpu_or_gpu) {
- FILE* fp;
- char cmp_filename[BUFSIZ];
- float* compare_vector = (float*)malloc(*size*sizeof(float));
- if(*cpu_or_gpu == 0) { //swap gpu/cpu for compare
- sprintf(cmp_filename, "debug_output_gpu_%d.dat",*id);
- }
- else {
- sprintf(cmp_filename, "debug_output_cpu_%d.dat",*id);
- }
- fopen(cmp_filename, "rb");
- /* read the values */
- if((fp=fopen(cmp_filename, "rb"))==NULL) {
- printf("Cannot open comparison file %s.\n",cmp_filename);
- exit(1);
- }
- if(fread(compare_vector, sizeof(float), *size, fp) != *size) {
- if(feof(fp))
- printf("Premature end of file.");
- else
- printf("File read error.");
- }
-
- fclose(fp);
-
- int i;
- int error_count=0;
- for(i=0;i<*size;i++) {
- if((fabs(vector[i] - compare_vector[i])/vector[i] > 0.0001)) {
- if(error_count < 30) {
- printf("ERROR[%d]: %g != %g\n",i,compare_vector[i], vector[i]);
- }
- error_count++;
- /* if(compare_vector[i] > 1e-30) error_count++; */
- }
- }
- printf("%d Total Errors\n",error_count);
- printf("size:%d\n",*size);
- /* for(i=0;i<30;i++) { */
- /* printf("val[%d]: %g != %g\n",i,compare_vector[i], vector[i]); */
- /* /\* printf("error_check[%d]= %g\n",abs(vector[i] - compare_vector[i])/vector[i]); *\/ */
- /* } */
-}
-
-void compare_ivector_(int* vector, int* size, int* id, int* cpu_or_gpu) {
- FILE* fp;
- char cmp_filename[BUFSIZ];
- int* compare_vector = (int*)malloc(*size*sizeof(int));
- if(*cpu_or_gpu == 0) { //swap gpu/cpu for compare
- sprintf(cmp_filename, "debug_output_gpu_%d.dat",*id);
- }
- else {
- sprintf(cmp_filename, "debug_output_cpu_%d.dat",*id);
- }
- fopen(cmp_filename, "rb");
- /* read the values */
- if((fp=fopen(cmp_filename, "rb"))==NULL) {
- printf("Cannot open comparison file %s.\n",cmp_filename);
- exit(1);
- }
- if(fread(compare_vector, sizeof(int), *size, fp) != *size) {
- if(feof(fp))
- printf("Premature end of file.");
- else
- printf("File read error.");
- }
-
- fclose(fp);
-
- int i;
- int error_count=0;
- for(i=0;i<*size;i++) {
- if((abs(vector[i] - compare_vector[i])/vector[i] > 0.01) && error_count < 30) {
- printf("ERROR[%d]: %g != %g\n",i,compare_vector[i], vector[i]);
- error_count++;
- }
- }
- printf("%d Total Errors\n",error_count);
-}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in 2011-10-06 03:31:24 UTC (rev 19027)
@@ -29,20 +29,32 @@
# @configure_input@
+## example:
# CUDA_LIBS = -L/apps/eiger/Cuda-4.0/cuda/lib64 -lcuda -lcudart -lcublas
# CUDA_INC = -I/apps/eiger/Cuda-4.0/cuda/include
# MPI_INC = -I/apps/eiger/mvapich2/1.5.1p1/mvapich2-gnu/include
+##
+#CUDA_LIBS= -L/u/dpeter/install/cuda/lib64 -lcudart -lcublas
+#MPI_INC= -I/usr/local/openmpi/1.4.3/gcc/x86_64/include
# MR added CUDA
@COND_CUDA_TRUE at CUDA_LIBS = -lcuda -lcudart -lcublas
@COND_CUDA_FALSE at CUDA_LIBS =
+# with configure: ./configure CUDA_LIB=.. CUDA_INC=.. MPI_INC=..
CUDA_LIB_LOCATION = @CUDA_LIB@
CUDA_LINK = $(CUDA_LIB_LOCATION) $(CUDA_LIBS)
-CUDA_INC = @CUDA_INC@
+CUDA_INC = @CUDA_INC@ -I../../
MPI_INC = @MPI_INC@
+ at COND_CUDA_TRUE@NVCC = nvcc
+ at COND_CUDA_FALSE@NVCC = g++
+
+ at COND_CUDA_TRUE@NVCC_FLAGS = $(CUDA_INC) $(MPI_INC) -DCUDA -gencode=arch=compute_20,code=sm_20
+ at COND_CUDA_FALSE@NVCC_FLAGS = $(MPI_INC)
+
+
FC = @FC@
FCFLAGS = #@FCFLAGS@
MPIFC = @MPIFC@
@@ -51,13 +63,10 @@
FLAGS_NO_CHECK = @FLAGS_NO_CHECK@
FCFLAGS_f90 = @FCFLAGS_f90@
- at COND_CUDA_TRUE@NVCC = nvcc
- at COND_CUDA_FALSE@NVCC = g++
- at COND_CUDA_TRUE@NVCC_FLAGS = $(CUDA_INC) $(MPI_INC) -DCUDA -gencode=arch=compute_20,code=sm_20
- at COND_CUDA_FALSE@NVCC_FLAGS = $(MPI_INC)
+SHARED = ../shared/
+CUDAD = ../cuda/
-SHARED = ../shared/
# E : executables directory
E = ../../bin
# O : objects directory
@@ -122,13 +131,28 @@
$O/write_VTK_data.shared.o \
$O/write_c_binary.cc.o
+CUDA_OBJECTS = \
+ $O/check_fields_cuda.cuda.o \
+ $O/compute_add_sources_cuda.cuda.o \
+ $O/compute_forces_acoustic_cuda.cuda.o \
+ $O/compute_forces_elastic_cuda.cuda.o \
+ $O/compute_kernels_cuda.cuda.o \
+ $O/compute_stacey_acoustic_cuda.cuda.o \
+ $O/compute_stacey_elastic_cuda.cuda.o \
+ $O/it_update_displacement_cuda.cuda.o \
+ $O/noise_tomography_cuda.cuda.o \
+ $O/prepare_mesh_constants_cuda.cuda.o \
+ $O/transfer_fields_cuda.cuda.o \
+ $O/write_seismograms_cuda.cuda.o \
+ $O/save_and_compare_cpu_vs_gpu.cudacc.o
+
# solver objects - no statically allocated arrays anymore
SOLVER_ARRAY_OBJECTS = \
$O/program_specfem3D.o \
$O/specfem3D_par.o \
$O/PML_init.o \
- $O/compute_boundary_kernel.shared.o \
- $O/compute_kernels.shared.o \
+ $O/compute_boundary_kernel.o \
+ $O/compute_kernels.o \
$O/compute_forces_acoustic.o \
$O/compute_forces_acoustic_pot.o \
$O/compute_forces_acoustic_PML.o \
@@ -160,16 +184,7 @@
$O/save_adjoint_kernels.o \
$O/specfem3D.o \
$O/assemble_MPI_vector.o \
- $O/noise_tomography.o \
- $O/it_update_displacement_cuda.cuda.o \
- $O/compute_forces_elastic_cuda.cuda.o \
- $O/compute_stacey_elastic_cuda.cuda.o \
- $O/compute_add_sources_cuda.cuda.o \
- $O/prepare_mesh_constants_cuda.cuda.o \
- $O/noise_tomography_cuda.cuda.o \
- $O/save_and_compare_cpu_vs_gpu.cc.o \
- $O/write_seismograms_cuda.cuda.o \
- $O/compute_kernels_cuda.cuda.o
+ $O/noise_tomography.o
# objects toggled between the parallel and serial version
@@ -179,8 +194,7 @@
LIBSPECFEM = $L/libspecfem.a
# objects for the pure Fortran version
- at COND_PYRE_FALSE@XGENERATE_DATABASES_OBJECTS = $O/program_generate_databases.o $(LIBSPECFEM)
- at COND_PYRE_FALSE@XSPECFEM_OBJECTS = $(SOLVER_ARRAY_OBJECTS) $(LIBSPECFEM)
+ at COND_PYRE_FALSE@XSPECFEM_OBJECTS = $(SOLVER_ARRAY_OBJECTS) $(LIBSPECFEM) $(CUDA_OBJECTS)
####
#### targets
@@ -188,7 +202,6 @@
# default targets for the pure Fortran version
@COND_PYRE_FALSE at DEFAULT = \
- at COND_PYRE_FALSE@ generate_databases \
@COND_PYRE_FALSE@ specfem3D \
@COND_PYRE_FALSE@ combine_vol_data \
@COND_PYRE_FALSE@ combine_surf_data \
@@ -209,9 +222,6 @@
# rules for the pure Fortran version
- at COND_PYRE_FALSE@xgenerate_databases: $(XGENERATE_DATABASES_OBJECTS) $(COND_MPI_OBJECTS)
- at COND_PYRE_FALSE@ ${FCLINK} -o ${E}/xgenerate_databases $(XGENERATE_DATABASES_OBJECTS) $(COND_MPI_OBJECTS) $(MPILIBS)
- at COND_PYRE_FALSE@
@COND_PYRE_FALSE@# solver also depends on values from mesher
@COND_PYRE_FALSE at xspecfem3D: $(XSPECFEM_OBJECTS) $(COND_MPI_OBJECTS)
@COND_PYRE_FALSE@ ${FCLINK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(COND_MPI_OBJECTS) $(MPILIBS) $(CUDA_LINK)
@@ -223,25 +233,25 @@
combine_surf_data: xcombine_surf_data
smooth_vol_data: xsmooth_vol_data
-xconvolve_source_timefunction: $O/convolve_source_timefunction.o
- ${FCCOMPILE_CHECK} -o ${E}/xconvolve_source_timefunction $O/convolve_source_timefunction.o
+xconvolve_source_timefunction: $O/convolve_source_timefunction.shared.o
+ ${FCCOMPILE_CHECK} -o ${E}/xconvolve_source_timefunction $O/convolve_source_timefunction.shared.o
- at COND_PYRE_FALSE@xcreate_movie_shakemap_AVS_DX_GMT: $O/create_movie_shakemap_AVS_DX_GMT.o $(LIBSPECFEM) $(OUTPUT)/surface_from_mesher.h
- at COND_PYRE_FALSE@ ${FCCOMPILE_CHECK} -o ${E}/xcreate_movie_shakemap_AVS_DX_GMT $O/create_movie_shakemap_AVS_DX_GMT.o $(LIBSPECFEM) -I$(OUTPUT)
+ at COND_PYRE_FALSE@xcreate_movie_shakemap_AVS_DX_GMT: $O/create_movie_shakemap_AVS_DX_GMT.shared.o $(LIBSPECFEM) $(OUTPUT)/surface_from_mesher.h
+ at COND_PYRE_FALSE@ ${FCCOMPILE_CHECK} -o ${E}/xcreate_movie_shakemap_AVS_DX_GMT $O/create_movie_shakemap_AVS_DX_GMT.shared.o $(LIBSPECFEM) -I$(OUTPUT)
-xcombine_vol_data: $O/combine_vol_data.o $O/write_c_binary.o $O/read_parameter_file.o $O/read_value_parameters.o $O/get_value_parameters.o $O/param_reader.o
- ${FCCOMPILE_CHECK} -o ${E}/xcombine_vol_data $O/combine_vol_data.o $O/write_c_binary.o $O/read_parameter_file.o $O/read_value_parameters.o $O/get_value_parameters.o $O/param_reader.o
+xcombine_vol_data: $O/combine_vol_data.shared.o $O/write_c_binary.cc.o $O/read_parameter_file.shared.o $O/read_value_parameters.shared.o $O/get_value_parameters.shared.o $O/param_reader.cc.o
+ ${FCCOMPILE_CHECK} -o ${E}/xcombine_vol_data $O/combine_vol_data.shared.o $O/write_c_binary.cc.o $O/read_parameter_file.shared.o $O/read_value_parameters.shared.o $O/get_value_parameters.shared.o $O/param_reader.cc.o
-xcombine_surf_data: $O/combine_surf_data.o $O/write_c_binary.o $O/param_reader.o
- ${FCCOMPILE_CHECK} -o ${E}/xcombine_surf_data $O/combine_surf_data.o $O/write_c_binary.o $O/param_reader.o
+xcombine_surf_data: $O/combine_surf_data.shared.o $O/write_c_binary.cc.o $O/param_reader.cc.o
+ ${FCCOMPILE_CHECK} -o ${E}/xcombine_surf_data $O/combine_surf_data.shared.o $O/write_c_binary.cc.o $O/param_reader.cc.o
-xsmooth_vol_data: $O/smooth_vol_data.o $O/read_parameter_file.o $O/read_value_parameters.o $O/get_value_parameters.o $O/param_reader.o $O/gll_library.o $O/exit_mpi.o $O/parallel.o
- ${FCLINK} -o ${E}/xsmooth_vol_data $O/smooth_vol_data.o $O/read_parameter_file.o $O/read_value_parameters.o $O/get_value_parameters.o $O/param_reader.o $O/gll_library.o $O/exit_mpi.o $O/parallel.o $(MPILIBS)
+xsmooth_vol_data: $O/smooth_vol_data.o $O/read_parameter_file.shared.o $O/read_value_parameters.shared.o $O/get_value_parameters.shared.o $O/param_reader.cc.o $O/gll_library.shared.o $O/exit_mpi.shared.o $O/parallel.o
+ ${FCLINK} -o ${E}/xsmooth_vol_data $O/smooth_vol_data.o $O/read_parameter_file.shared.o $O/read_value_parameters.shared.o $O/get_value_parameters.shared.o $O/param_reader.cc.o $O/gll_library.shared.o $O/exit_mpi.shared.o $O/parallel.o $(MPILIBS)
clean:
rm -f $O/* *.o *.gnu *.mod $(OUTPUT)/timestamp* $(OUTPUT)/starttime*txt work.pc* \
- xgenerate_databases xspecfem3D \
+ xspecfem3D \
xconvolve_source_timefunction \
xcreate_movie_shakemap_AVS_DX_GMT xcombine_vol_data xcombine_surf_data xsmooth_vol_data
@@ -263,35 +273,30 @@
###
$O/%.o: %.f90 $(SHARED)constants.h
- ${FCCOMPILE_NO_CHECK} -c -o $O/$@ $<
+ ${FCCOMPILE_NO_CHECK} -c -o $@ $<
$O/%.shared.o: ${SHARED}%.f90 $(SHARED)constants.h
- ${FCCOMPILE_NO_CHECK} -c -o $O/$@ $<
+ ${FCCOMPILE_NO_CHECK} -c -o $@ $<
-$O/%.cuda.o: %.cu
- $(NVCC) -c $< -o $O/$@ $(NVCC_FLAGS)
+###
+### CUDA compilation
+###
+$O/%.cuda.o: ${CUDAD}%.cu ../../config.h $(CUDAD)mesh_constants_cuda.h $(CUDAD)prepare_constants_cuda.h
+ $(NVCC) -c $< -o $@ $(NVCC_FLAGS)
+###
### C compilation
+###
force_ftz.o: ${SHARED}/force_ftz.c ../../config.h
${CC} -c $(CPPFLAGS) $(CFLAGS) -I../.. -o $O/force_ftz.o ${SHARED}/force_ftz.c
-
$O/%.cc.o: ${SHARED}/%.c ../../config.h
- ${CC} -c $(CFLAGS) $(MPI_INC) -o $O/$@ ${SHARED}/$< -I../../
+ ${CC} -c $(CFLAGS) $(MPI_INC) -o $@ ${SHARED}/$< -I../../
+$O/%.cudacc.o: ${CUDAD}/%.c ../../config.h
+ ${CC} -c $(CFLAGS) $(MPI_INC) -o $@ ${CUDAD}/$< -I../../
-###
-### C files below
-###
-# $O/param_reader.o: ${SHARED}/param_reader.c
-# ${CC} -c $(CFLAGS) -o $O/param_reader.o ${SHARED}/param_reader.c -I../../
-
-# $O/write_c_binary.o: ${SHARED}/write_c_binary.c
-# ${CC} -c $(CFLAGS) -o $O/write_c_binary.o ${SHARED}/write_c_binary.c -I../../
-
-
-
###
### MPI compilation without optimization
###
@@ -299,3 +304,6 @@
$O/parallel.o: $(SHARED)constants.h ${SHARED}/parallel.f90
${MPIFCCOMPILE_CHECK} -c -o $O/parallel.o ${SHARED}/parallel.f90
+$O/smooth_vol_data.o: $(SHARED)constants.h ${SHARED}/smooth_vol_data.f90
+ ${MPIFCCOMPILE_NO_CHECK} -c -o $O/smooth_vol_data.o ${SHARED}/smooth_vol_data.f90
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,372 +1,509 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!----
-!---- assemble the contributions between slices using non-blocking MPI
-!----
-
- subroutine assemble_MPI_vector_ext_mesh(NPROC,NGLOB_AB,array_val, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- my_neighbours_ext_mesh)
-
- implicit none
-
- include "constants.h"
-
- integer :: NPROC
- integer :: NGLOB_AB
-
-! array to assemble
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
-
- integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
- integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
-
- ! local parameters
-
- ! send/receive temporary buffers
- !real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
- ! buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh
- real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_vector_ext_mesh
- real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_recv_vector_ext_mesh
-
- ! requests
- !integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
- integer, dimension(:), allocatable :: request_send_vector_ext_mesh
- integer, dimension(:), allocatable :: request_recv_vector_ext_mesh
-
- integer ipoin,iinterface,ier
-
-
-! here we have to assemble all the contributions between partitions using MPI
-
-! assemble only if more than one partition
- if(NPROC > 1) then
-
- allocate(buffer_send_vector_ext_mesh(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array buffer_send_vector_ext_mesh'
- allocate(buffer_recv_vector_ext_mesh(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array buffer_recv_vector_ext_mesh'
- allocate(request_send_vector_ext_mesh(num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array request_send_vector_ext_mesh'
- allocate(request_recv_vector_ext_mesh(num_interfaces_ext_mesh),stat=ier)
- if( ier /= 0 ) stop 'error allocating array request_recv_vector_ext_mesh'
-
- ! partition border copy into the buffer
- do iinterface = 1, num_interfaces_ext_mesh
- do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- buffer_send_vector_ext_mesh(:,ipoin,iinterface) = &
- array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface))
- enddo
- enddo
-
- ! send messages
- do iinterface = 1, num_interfaces_ext_mesh
- call isend_cr(buffer_send_vector_ext_mesh(1,1,iinterface), &
- NDIM*nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_send_vector_ext_mesh(iinterface) &
- )
- call irecv_cr(buffer_recv_vector_ext_mesh(1,1,iinterface), &
- NDIM*nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_recv_vector_ext_mesh(iinterface) &
- )
- enddo
-
- ! wait for communications completion (recv)
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_recv_vector_ext_mesh(iinterface))
- enddo
-
- ! adding contributions of neighbours
- do iinterface = 1, num_interfaces_ext_mesh
- do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
- array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) &
- + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
- enddo
- enddo
-
- ! wait for communications completion (send)
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_send_vector_ext_mesh(iinterface))
- enddo
-
- deallocate(buffer_send_vector_ext_mesh)
- deallocate(buffer_recv_vector_ext_mesh)
- deallocate(request_send_vector_ext_mesh)
- deallocate(request_recv_vector_ext_mesh)
-
- endif
-
- end subroutine assemble_MPI_vector_ext_mesh
-
- !
- !-------------------------------------------------------------------------------------------------
- !
-
- subroutine assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,array_val, &
- buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,my_neighbours_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh &
- )
-
- ! sends data
-
- implicit none
-
- include "constants.h"
-
- integer :: NPROC
- integer :: NGLOB_AB
-
- ! array to assemble
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
-
- integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
-
- real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
- buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh
-
- integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
- integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
-
- integer ipoin,iinterface
-
- ! here we have to assemble all the contributions between partitions using MPI
-
- ! assemble only if more than one partition
- if(NPROC > 1) then
-
- ! partition border copy into the buffer
- do iinterface = 1, num_interfaces_ext_mesh
- do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- buffer_send_vector_ext_mesh(:,ipoin,iinterface) = &
- array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface))
- enddo
- enddo
-
- ! send messages
- do iinterface = 1, num_interfaces_ext_mesh
- call isend_cr(buffer_send_vector_ext_mesh(1,1,iinterface), &
- NDIM*nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_send_vector_ext_mesh(iinterface) &
- )
- call irecv_cr(buffer_recv_vector_ext_mesh(1,1,iinterface), &
- NDIM*nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_recv_vector_ext_mesh(iinterface) &
- )
- enddo
-
- endif
-
- end subroutine assemble_MPI_vector_ext_mesh_s
-
- subroutine assemble_MPI_vector_ext_mesh_send_cuda(NPROC,NGLOB_AB,array_val, &
- buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
- num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,my_neighbours_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
-
- ! sends data
-
- implicit none
-
- include "constants.h"
-
- integer :: NPROC
- integer :: NGLOB_AB
-
- ! array to assemble
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
-
- integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
-
- real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
- buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh
-
- integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
- integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
-
- integer ipoin,iinterface
-
- ! here we have to assemble all the contributions between partitions using MPI
-
- ! assemble only if more than one partition
- if(NPROC > 1) then
-
- ! send messages
- do iinterface = 1, num_interfaces_ext_mesh
- call isend_cr(buffer_send_vector_ext_mesh(1,1,iinterface), &
- NDIM*nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_send_vector_ext_mesh(iinterface) &
- )
- call irecv_cr(buffer_recv_vector_ext_mesh(1,1,iinterface), &
- NDIM*nibool_interfaces_ext_mesh(iinterface), &
- my_neighbours_ext_mesh(iinterface), &
- itag, &
- request_recv_vector_ext_mesh(iinterface) &
- )
- enddo
-
- endif
-
- end subroutine assemble_MPI_vector_ext_mesh_send_cuda
- !
- !-------------------------------------------------------------------------------------------------
- !
-
- subroutine assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_AB,array_val, &
- buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
-
-! waits for data to receive and assembles
-
- implicit none
-
- include "constants.h"
-
- integer :: NPROC
- integer :: NGLOB_AB
-
-! array to assemble
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
-
- integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
-
- real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
- buffer_recv_vector_ext_mesh
-
- integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
- integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
-
- integer ipoin,iinterface
-
-! here we have to assemble all the contributions between partitions using MPI
-
-! assemble only if more than one partition
- if(NPROC > 1) then
-
-! wait for communications completion (recv)
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_recv_vector_ext_mesh(iinterface))
- enddo
-
-! adding contributions of neighbours
- do iinterface = 1, num_interfaces_ext_mesh
- do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
- array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) &
- + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
- enddo
- enddo
-
-! wait for communications completion (send)
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_send_vector_ext_mesh(iinterface))
- enddo
-
- endif
-
- end subroutine assemble_MPI_vector_ext_mesh_w
-
- subroutine assemble_MPI_vector_ext_mesh_write_cuda(NPROC,NGLOB_AB,array_val, Mesh_pointer, &
- buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
- request_send_vector_ext_mesh,request_recv_vector_ext_mesh,&
- FORWARD_OR_ADJOINT )
-
-! waits for data to receive and assembles
-
- implicit none
-
- include "constants.h"
-
- integer :: NPROC
- integer :: NGLOB_AB
- integer(kind=8) :: Mesh_pointer
-! array to assemble
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
-
- integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
-
- real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
- buffer_recv_vector_ext_mesh
-
- integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
- integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
- integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
-
- integer ipoin,iinterface
- integer FORWARD_OR_ADJOINT
-
-! here we have to assemble all the contributions between partitions using MPI
-
-! assemble only if more than one partition
- if(NPROC > 1) then
-
-! wait for communications completion (recv)
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_recv_vector_ext_mesh(iinterface))
- enddo
-
-! adding contributions of neighbours
- call transfer_and_assemble_accel_to_device(Mesh_pointer, array_val, buffer_recv_vector_ext_mesh, &
- num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh,&
- ibool_interfaces_ext_mesh,FORWARD_OR_ADJOINT)
-
- ! This step is done via previous function transfer_and_assemble...
- ! do iinterface = 1, num_interfaces_ext_mesh
- ! do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
- ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
- ! enddo
- ! enddo
-
-! wait for communications completion (send)
- do iinterface = 1, num_interfaces_ext_mesh
- call wait_req(request_send_vector_ext_mesh(iinterface))
- enddo
-
- endif
-
- end subroutine assemble_MPI_vector_ext_mesh_write_cuda
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!----
+!---- assemble the contributions between slices using non-blocking MPI
+!----
+
+ subroutine assemble_MPI_vector_ext_mesh(NPROC,NGLOB_AB,array_val, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+
+! array to assemble
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+
+ ! local parameters
+
+ ! send/receive temporary buffers
+ !real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ ! buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_vector_ext_mesh
+ real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_recv_vector_ext_mesh
+
+ ! requests
+ !integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
+ integer, dimension(:), allocatable :: request_send_vector_ext_mesh
+ integer, dimension(:), allocatable :: request_recv_vector_ext_mesh
+
+ integer ipoin,iinterface,ier
+
+
+! here we have to assemble all the contributions between partitions using MPI
+
+! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ allocate(buffer_send_vector_ext_mesh(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array buffer_send_vector_ext_mesh'
+ allocate(buffer_recv_vector_ext_mesh(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array buffer_recv_vector_ext_mesh'
+ allocate(request_send_vector_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array request_send_vector_ext_mesh'
+ allocate(request_recv_vector_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array request_recv_vector_ext_mesh'
+
+ ! partition border copy into the buffer
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ buffer_send_vector_ext_mesh(:,ipoin,iinterface) = &
+ array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface))
+ enddo
+ enddo
+
+ ! send messages
+ do iinterface = 1, num_interfaces_ext_mesh
+ call isend_cr(buffer_send_vector_ext_mesh(1,1,iinterface), &
+ NDIM*nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_send_vector_ext_mesh(iinterface) &
+ )
+ call irecv_cr(buffer_recv_vector_ext_mesh(1,1,iinterface), &
+ NDIM*nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_recv_vector_ext_mesh(iinterface) &
+ )
+ enddo
+
+ ! wait for communications completion (recv)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_recv_vector_ext_mesh(iinterface))
+ enddo
+
+ ! adding contributions of neighbours
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) &
+ + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
+ enddo
+ enddo
+
+ ! wait for communications completion (send)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_send_vector_ext_mesh(iinterface))
+ enddo
+
+ deallocate(buffer_send_vector_ext_mesh)
+ deallocate(buffer_recv_vector_ext_mesh)
+ deallocate(request_send_vector_ext_mesh)
+ deallocate(request_recv_vector_ext_mesh)
+
+ endif
+
+ end subroutine assemble_MPI_vector_ext_mesh
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,array_val, &
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,my_neighbours_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh &
+ )
+
+ ! sends data
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+
+ ! array to assemble
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
+
+ integer ipoin,iinterface
+
+ ! here we have to assemble all the contributions between partitions using MPI
+
+ ! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ ! partition border copy into the buffer
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ buffer_send_vector_ext_mesh(:,ipoin,iinterface) = &
+ array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface))
+ enddo
+ enddo
+
+ ! send messages
+ do iinterface = 1, num_interfaces_ext_mesh
+ call isend_cr(buffer_send_vector_ext_mesh(1,1,iinterface), &
+ NDIM*nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_send_vector_ext_mesh(iinterface) &
+ )
+ call irecv_cr(buffer_recv_vector_ext_mesh(1,1,iinterface), &
+ NDIM*nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_recv_vector_ext_mesh(iinterface) &
+ )
+ enddo
+
+ endif
+
+ end subroutine assemble_MPI_vector_ext_mesh_s
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine assemble_MPI_vector_ext_mesh_send_cuda(NPROC, &
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+
+ ! sends data
+ ! note: array to assemble already filled into buffer_send_vector_ext_mesh array
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
+
+ integer iinterface
+
+ ! here we have to assemble all the contributions between partitions using MPI
+
+ ! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ ! send messages
+ do iinterface = 1, num_interfaces_ext_mesh
+ call isend_cr(buffer_send_vector_ext_mesh(1,1,iinterface), &
+ NDIM*nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_send_vector_ext_mesh(iinterface) &
+ )
+ call irecv_cr(buffer_recv_vector_ext_mesh(1,1,iinterface), &
+ NDIM*nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_recv_vector_ext_mesh(iinterface) &
+ )
+ enddo
+
+ endif
+
+ end subroutine assemble_MPI_vector_ext_mesh_send_cuda
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_AB,array_val, &
+ buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+
+! waits for data to receive and assembles
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+
+! array to assemble
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_recv_vector_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
+
+ integer ipoin,iinterface
+
+! here we have to assemble all the contributions between partitions using MPI
+
+! assemble only if more than one partition
+ if(NPROC > 1) then
+
+! wait for communications completion (recv)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_recv_vector_ext_mesh(iinterface))
+ enddo
+
+! adding contributions of neighbours
+ do iinterface = 1, num_interfaces_ext_mesh
+ do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) &
+ + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
+ enddo
+ enddo
+
+! wait for communications completion (send)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_send_vector_ext_mesh(iinterface))
+ enddo
+
+ endif
+
+ end subroutine assemble_MPI_vector_ext_mesh_w
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine assemble_MPI_vector_ext_mesh_write_cuda(NPROC,NGLOB_AB,array_val, Mesh_pointer, &
+ buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_vector_ext_mesh,request_recv_vector_ext_mesh,&
+ FORWARD_OR_ADJOINT )
+
+! waits for data to receive and assembles
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+ integer(kind=8) :: Mesh_pointer
+! array to assemble
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_recv_vector_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_vector_ext_mesh,request_recv_vector_ext_mesh
+
+ integer iinterface ! ipoin
+ integer FORWARD_OR_ADJOINT
+
+! here we have to assemble all the contributions between partitions using MPI
+
+! assemble only if more than one partition
+ if(NPROC > 1) then
+
+! wait for communications completion (recv)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_recv_vector_ext_mesh(iinterface))
+ enddo
+
+! adding contributions of neighbours
+ call transfer_and_assemble_accel_to_device(Mesh_pointer, array_val, buffer_recv_vector_ext_mesh, &
+ num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh,&
+ ibool_interfaces_ext_mesh,FORWARD_OR_ADJOINT)
+
+ ! This step is done via previous function transfer_and_assemble...
+ ! do iinterface = 1, num_interfaces_ext_mesh
+ ! do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
+ ! enddo
+ ! enddo
+
+! wait for communications completion (send)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_send_vector_ext_mesh(iinterface))
+ enddo
+
+ endif
+
+ end subroutine assemble_MPI_vector_ext_mesh_write_cuda
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine assemble_MPI_scalar_ext_mesh_send_cuda(NPROC, &
+ buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh, &
+ my_neighbours_ext_mesh, &
+ request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+
+! non-blocking MPI send
+
+ ! sends data
+ ! note: assembling data already filled into buffer_send_scalar_ext_mesh array
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+ real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
+
+ integer iinterface
+
+! sends only if more than one partition
+ if(NPROC > 1) then
+
+ ! note: partition border copy into the buffer has already been done
+ ! by routine transfer_boundary_potential_from_device()
+
+ ! send messages
+ do iinterface = 1, num_interfaces_ext_mesh
+ ! non-blocking synchronous send request
+ call isend_cr(buffer_send_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_send_scalar_ext_mesh(iinterface) &
+ )
+ ! receive request
+ call irecv_cr(buffer_recv_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+ nibool_interfaces_ext_mesh(iinterface), &
+ my_neighbours_ext_mesh(iinterface), &
+ itag, &
+ request_recv_scalar_ext_mesh(iinterface) &
+ )
+
+ enddo
+
+ endif
+
+ end subroutine assemble_MPI_scalar_ext_mesh_send_cuda
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine assemble_MPI_scalar_ext_mesh_write_cuda(NPROC,NGLOB_AB,array_val, &
+ Mesh_pointer, &
+ buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
+ FORWARD_OR_ADJOINT)
+
+! waits for send/receiver to be completed and assembles contributions
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NPROC
+ integer :: NGLOB_AB
+ integer(kind=8) :: Mesh_pointer
+
+ integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+
+! array to assemble
+ real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: array_val
+
+
+ real(kind=CUSTOM_REAL), dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: &
+ buffer_recv_scalar_ext_mesh
+
+ integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh
+ integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+ integer, dimension(num_interfaces_ext_mesh) :: request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh
+
+ integer FORWARD_OR_ADJOINT
+
+ integer iinterface ! ipoin
+
+! assemble only if more than one partition
+ if(NPROC > 1) then
+
+ ! wait for communications completion (recv)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_recv_scalar_ext_mesh(iinterface))
+ enddo
+
+ ! adding contributions of neighbours
+ call transfer_and_assemble_potential_to_device(Mesh_pointer, array_val, buffer_recv_scalar_ext_mesh, &
+ num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, nibool_interfaces_ext_mesh,&
+ ibool_interfaces_ext_mesh,FORWARD_OR_ADJOINT)
+
+ ! note: adding contributions of neighbours has been done just above for cuda
+ !do iinterface = 1, num_interfaces_ext_mesh
+ ! do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+ ! array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+ ! array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) &
+ ! + buffer_recv_scalar_ext_mesh(ipoin,iinterface)
+ ! enddo
+ !enddo
+
+ ! wait for communications completion (send)
+ do iinterface = 1, num_interfaces_ext_mesh
+ call wait_req(request_send_scalar_ext_mesh(iinterface))
+ enddo
+
+ endif
+
+ end subroutine assemble_MPI_scalar_ext_mesh_write_cuda
+
+
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_acoustic.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -35,7 +35,8 @@
SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
nrec,islice_selected_rec,ispec_selected_rec, &
nadj_rec_local,adj_sourcearrays,b_potential_dot_dot_acoustic, &
- NTSTEP_BETWEEN_READ_ADJSRC )
+ NTSTEP_BETWEEN_READ_ADJSRC, &
+ GPU_MODE, Mesh_pointer )
use specfem_par,only: PRINT_SOURCE_TIME_FUNCTION,stf_used_total, &
xigll,yigll,zigll,xi_receiver,eta_receiver,gamma_receiver,&
@@ -73,6 +74,8 @@
!adjoint simulations
integer:: SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT
+ logical:: GPU_MODE
+ integer(kind=8) :: Mesh_pointer
integer:: nrec
integer,dimension(nrec) :: islice_selected_rec,ispec_selected_rec
integer:: nadj_rec_local
@@ -88,7 +91,8 @@
real(kind=CUSTOM_REAL) stf_used,stf_used_total_all,time_source
integer :: isource,iglob,ispec,i,j,k,ier
integer :: irec_local,irec
-
+ double precision, dimension(NSOURCES) :: stf_pre_compute
+
! adjoint sources in SU format
integer :: it_start,it_end
real(kind=CUSTOM_REAL) :: adj_temp(NSTEP)
@@ -110,93 +114,118 @@
! forward simulations
if (SIMULATION_TYPE == 1) then
- ! adds acoustic sources
- do isource = 1,NSOURCES
+!way 2
+ if(GPU_MODE) then
+ if( NSOURCES > 0 ) then
+ do isource = 1,NSOURCES
+ if(USE_FORCE_POINT_SOURCE) then
+ ! precomputes source time function factor
+ stf_pre_compute(isource) = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr( &
+ dble(it-1)*DT-t0-tshift_cmt(isource),hdur(isource))
+ else
+ stf_pre_compute(isource) = comp_source_time_function_gauss( &
+ dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ endif
+ enddo
+ stf_used_total = stf_used_total + sum(stf_pre_compute(:))
+ ! only implements SIMTYPE=1 and NOISE_TOM=0
+ ! write(*,*) "fortran dt = ", dt
+ ! change dt -> DT
+ call compute_add_sources_acoustic_cuda(Mesh_pointer, phase_is_inner, &
+ NSOURCES, SIMULATION_TYPE, &
+ USE_FORCE_POINT_SOURCE, stf_pre_compute, myrank)
+ endif
+
+ else ! .NOT. GPU_MODE
- ! add the source (only if this proc carries the source)
- if(myrank == islice_selected_source(isource)) then
+ ! adds acoustic sources
+ do isource = 1,NSOURCES
- ispec = ispec_selected_source(isource)
+ ! add the source (only if this proc carries the source)
+ if(myrank == islice_selected_source(isource)) then
- if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+ ispec = ispec_selected_source(isource)
- if( ispec_is_acoustic(ispec) ) then
+ if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
- if(USE_FORCE_POINT_SOURCE) then
+ if( ispec_is_acoustic(ispec) ) then
- ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
- iglob = ibool(nint(xi_source(isource)), &
- nint(eta_source(isource)), &
- nint(gamma_source(isource)), &
- ispec)
+ if(USE_FORCE_POINT_SOURCE) then
- f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
+ ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+ iglob = ibool(nint(xi_source(isource)), &
+ nint(eta_source(isource)), &
+ nint(gamma_source(isource)), &
+ ispec)
- !if (it == 1 .and. myrank == 0) then
- ! write(IMAIN,*) 'using a source of dominant frequency ',f0
- ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
- ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
- !endif
+ f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
- ! gaussian source time function
- !stf_used = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ !if (it == 1 .and. myrank == 0) then
+ ! write(IMAIN,*) 'using a source of dominant frequency ',f0
+ ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+ ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+ !endif
- ! we use nu_source(:,3) here because we want a source normal to the surface.
- ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
- stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
+ ! gaussian source time function
+ !stf_used = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
- ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
- ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
- ! to add minus the source to Chi_dot_dot to get plus the source in pressure:
+ ! we use nu_source(:,3) here because we want a source normal to the surface.
+ ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
+ stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
- ! acoustic source for pressure gets divided by kappa
- ! source contribution
- potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
- - stf_used / kappastore(nint(xi_source(isource)), &
- nint(eta_source(isource)), &
- nint(gamma_source(isource)),ispec)
+ ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
+ ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
+ ! to add minus the source to Chi_dot_dot to get plus the source in pressure:
- else
+ ! acoustic source for pressure gets divided by kappa
+ ! source contribution
+ potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
+ - stf_used / kappastore(nint(xi_source(isource)), &
+ nint(eta_source(isource)), &
+ nint(gamma_source(isource)),ispec)
- ! gaussian source time
- stf = comp_source_time_function_gauss(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ else
- ! quasi-heaviside
- !stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ ! gaussian source time
+ stf = comp_source_time_function_gauss(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
- ! distinguishes between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- stf_used = sngl(stf)
- else
- stf_used = stf
- endif
+ ! quasi-heaviside
+ !stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
- ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
- ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
- ! to add minus the source to Chi_dot_dot to get plus the source in pressure
+ ! distinguishes between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ stf_used = sngl(stf)
+ else
+ stf_used = stf
+ endif
- ! add source array
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- ! adds source contribution
- ! note: acoustic source for pressure gets divided by kappa
- iglob = ibool(i,j,k,ispec)
- potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
- - sourcearrays(isource,1,i,j,k) * stf_used / kappastore(i,j,k,ispec)
- enddo
+ ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
+ ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
+ ! to add minus the source to Chi_dot_dot to get plus the source in pressure
+
+ ! add source array
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ ! adds source contribution
+ ! note: acoustic source for pressure gets divided by kappa
+ iglob = ibool(i,j,k,ispec)
+ potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
+ - sourcearrays(isource,1,i,j,k) * stf_used / kappastore(i,j,k,ispec)
+ enddo
+ enddo
enddo
- enddo
- endif ! USE_FORCE_POINT_SOURCE
+ endif ! USE_FORCE_POINT_SOURCE
- stf_used_total = stf_used_total + stf_used
+ stf_used_total = stf_used_total + stf_used
- endif ! ispec_is_elastic
- endif ! ispec_is_inner
- endif ! myrank
+ endif ! ispec_is_elastic
+ endif ! ispec_is_inner
+ endif ! myrank
- enddo ! NSOURCES
+ enddo ! NSOURCES
+ endif ! GPU_MODE
endif
! NOTE: adjoint sources and backward wavefield timing:
@@ -226,116 +255,131 @@
! adjoint simulations
if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
- ! read in adjoint sources block by block (for memory consideration)
- ! e.g., in exploration experiments, both the number of receivers (nrec) and
- ! the number of time steps (NSTEP) are huge,
- ! which may cause problems since we have a large array:
- ! adj_sourcearrays(nadj_rec_local,NSTEP,NDIM,NGLLX,NGLLY,NGLLZ)
+ ! adds adjoint source in this partitions
+ if( nadj_rec_local > 0 ) then
- ! figure out if we need to read in a chunk of the adjoint source at this timestep
- it_sub_adj = ceiling( dble(it)/dble(NTSTEP_BETWEEN_READ_ADJSRC) ) !chunk_number
- ibool_read_adj_arrays = (((mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC) == 0)) .and. (nadj_rec_local > 0))
+ ! read in adjoint sources block by block (for memory consideration)
+ ! e.g., in exploration experiments, both the number of receivers (nrec) and
+ ! the number of time steps (NSTEP) are huge,
+ ! which may cause problems since we have a large array:
+ ! adj_sourcearrays(nadj_rec_local,NSTEP,NDIM,NGLLX,NGLLY,NGLLZ)
- ! needs to read in a new chunk/block of the adjoint source
- ! note that for each partition, we divide it into two parts --- boundaries and interior --- indicated by 'phase_is_inner'
- ! we first do calculations for the boudaries, and then start communication
- ! with other partitions while we calculate for the inner part
- ! this must be done carefully, otherwise the adjoint sources may be added twice
- if (ibool_read_adj_arrays .and. (.not. phase_is_inner)) then
+ ! figure out if we need to read in a chunk of the adjoint source at this timestep
+ it_sub_adj = ceiling( dble(it)/dble(NTSTEP_BETWEEN_READ_ADJSRC) ) !chunk_number
+ ibool_read_adj_arrays = (((mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC) == 0)) .and. (nadj_rec_local > 0))
- ! allocates temporary source array
- allocate(adj_sourcearray(NTSTEP_BETWEEN_READ_ADJSRC,NDIM,NGLLX,NGLLY,NGLLZ),stat=ier)
- if( ier /= 0 ) stop 'error allocating array adj_sourcearray'
+ ! needs to read in a new chunk/block of the adjoint source
+ ! note that for each partition, we divide it into two parts --- boundaries and interior --- indicated by 'phase_is_inner'
+ ! we first do calculations for the boudaries, and then start communication
+ ! with other partitions while we calculate for the inner part
+ ! this must be done carefully, otherwise the adjoint sources may be added twice
+ if (ibool_read_adj_arrays .and. (.not. phase_is_inner)) then
- if (.not. SU_FORMAT) then
- !!! read ascii adjoint sources
- irec_local = 0
- do irec = 1, nrec
- ! compute source arrays
- if (myrank == islice_selected_rec(irec)) then
- irec_local = irec_local + 1
+ ! allocates temporary source array
+ allocate(adj_sourcearray(NTSTEP_BETWEEN_READ_ADJSRC,NDIM,NGLLX,NGLLY,NGLLZ),stat=ier)
+ if( ier /= 0 ) stop 'error allocating array adj_sourcearray'
- ! reads in **sta**.**net**.**LH**.adj files
- adj_source_file = trim(station_name(irec))//'.'//trim(network_name(irec))
- call compute_arrays_adjoint_source(myrank,adj_source_file, &
- xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
- adj_sourcearray, xigll,yigll,zigll, &
- it_sub_adj,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC)
+ if (.not. SU_FORMAT) then
+ !!! read ascii adjoint sources
+ irec_local = 0
+ do irec = 1, nrec
+ ! compute source arrays
+ if (myrank == islice_selected_rec(irec)) then
+ irec_local = irec_local + 1
+
+ ! reads in **sta**.**net**.**LH**.adj files
+ adj_source_file = trim(station_name(irec))//'.'//trim(network_name(irec))
+ call compute_arrays_adjoint_source(myrank,adj_source_file, &
+ xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
+ adj_sourcearray, xigll,yigll,zigll, &
+ it_sub_adj,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC)
+ do itime = 1,NTSTEP_BETWEEN_READ_ADJSRC
+ adj_sourcearrays(irec_local,itime,:,:,:,:) = adj_sourcearray(itime,:,:,:,:)
+ enddo
+
+ endif
+ enddo
+ else
+ !!! read SU adjoint sources
+ ! range of the block we need to read
+ it_start = NSTEP - it_sub_adj*NTSTEP_BETWEEN_READ_ADJSRC + 1
+ it_end = it_start + NTSTEP_BETWEEN_READ_ADJSRC - 1
+ write(procname,"(i4)") myrank
+ open(unit=IIN_SU1, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dx_SU.adj', &
+ access='direct',recl=240+4*(NSTEP))
+ do irec_local = 1,nrec_local
+ irec = number_receiver_global(irec_local)
+ read(IIN_SU1,rec=irec_local) r4head, adj_temp
+ adj_src(:,1)=adj_temp(it_start:it_end)
+ adj_src(:,2)=0.0 !TRIVIAL
+ adj_src(:,3)=0.0 !TRIVIAL
+ ! lagrange interpolators for receiver location
+ call lagrange_any(xi_receiver(irec),NGLLX,xigll,hxir,hpxir)
+ call lagrange_any(eta_receiver(irec),NGLLY,yigll,hetar,hpetar)
+ call lagrange_any(gamma_receiver(irec),NGLLZ,zigll,hgammar,hpgammar)
+ ! interpolates adjoint source onto GLL points within this element
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
+ enddo
+ enddo
+ enddo
do itime = 1,NTSTEP_BETWEEN_READ_ADJSRC
adj_sourcearrays(irec_local,itime,:,:,:,:) = adj_sourcearray(itime,:,:,:,:)
enddo
-
- endif
- enddo
- else
- !!! read SU adjoint sources
- ! range of the block we need to read
- it_start = NSTEP - it_sub_adj*NTSTEP_BETWEEN_READ_ADJSRC + 1
- it_end = it_start + NTSTEP_BETWEEN_READ_ADJSRC - 1
- write(procname,"(i4)") myrank
- open(unit=IIN_SU1, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dx_SU.adj', &
- access='direct',recl=240+4*(NSTEP))
- do irec_local = 1,nrec_local
- irec = number_receiver_global(irec_local)
- read(IIN_SU1,rec=irec_local) r4head, adj_temp
- adj_src(:,1)=adj_temp(it_start:it_end)
- adj_src(:,2)=0.0 !TRIVIAL
- adj_src(:,3)=0.0 !TRIVIAL
- ! lagrange interpolators for receiver location
- call lagrange_any(xi_receiver(irec),NGLLX,xigll,hxir,hpxir)
- call lagrange_any(eta_receiver(irec),NGLLY,yigll,hetar,hpetar)
- call lagrange_any(gamma_receiver(irec),NGLLZ,zigll,hgammar,hpgammar)
- ! interpolates adjoint source onto GLL points within this element
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
- enddo
- enddo
enddo
- do itime = 1,NTSTEP_BETWEEN_READ_ADJSRC
- adj_sourcearrays(irec_local,itime,:,:,:,:) = adj_sourcearray(itime,:,:,:,:)
- enddo
- enddo
- close(IIN_SU1)
- endif !if (.not. SU_FORMAT)
-
- deallocate(adj_sourcearray)
+ close(IIN_SU1)
+ endif !if (.not. SU_FORMAT)
+
+ deallocate(adj_sourcearray)
- endif ! if(ibool_read_adj_arrays)
+ endif ! if(ibool_read_adj_arrays)
- if( it < NSTEP ) then
- ! receivers act as sources
- irec_local = 0
- do irec = 1,nrec
- ! add the source (only if this proc carries the source)
- if (myrank == islice_selected_rec(irec)) then
- irec_local = irec_local + 1
+ if( it < NSTEP ) then
+ ! receivers act as sources
+ if( GPU_MODE) then
+ call add_sources_acoustic_sim_type_2_or_3_cuda(Mesh_pointer, adj_sourcearrays, &
+ size(adj_sourcearrays), phase_is_inner, myrank, nrec, &
+ NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC),&
+ islice_selected_rec, nadj_rec_local, NTSTEP_BETWEEN_READ_ADJSRC)
+ else
+ irec_local = 0
+ do irec = 1,nrec
+ ! add the source (only if this proc carries the source)
+ if (myrank == islice_selected_rec(irec)) then
+ irec_local = irec_local + 1
- ! adds source array
- ispec = ispec_selected_rec(irec)
-
- ! checks if element is in phase_is_inner run
- if (ispec_is_inner(ispec_selected_rec(irec)) .eqv. phase_is_inner) then
-
- do k = 1,NGLLZ
- do j = 1,NGLLY
- do i = 1,NGLLX
- iglob = ibool(i,j,k,ispec)
-
- potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
- + adj_sourcearrays(irec_local, &
- NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC), &
- 1,i,j,k)
- enddo
- enddo
- enddo
-
- endif ! phase_is_inner
-
- endif
- enddo ! nrec
- endif ! it
+ ! adds source array
+ ispec = ispec_selected_rec(irec)
+ if( ispec_is_acoustic(ispec) ) then
+
+ ! checks if element is in phase_is_inner run
+ if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+ do k = 1,NGLLZ
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ iglob = ibool(i,j,k,ispec)
+ ! beware, for acoustic medium, a pressure source would be taking the negative
+ ! and divide by Kappa of the fluid;
+ ! this would have to be done when constructing the adjoint source.
+ !
+ ! note: we take the first component of the adj_sourcearrays
+ ! the idea is to have e.g. a pressure source, where all 3 components would be the same
+ potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
+ + adj_sourcearrays(irec_local, &
+ NTSTEP_BETWEEN_READ_ADJSRC - mod(it-1,NTSTEP_BETWEEN_READ_ADJSRC), &
+ 1,i,j,k)
+ enddo
+ enddo
+ enddo
+ endif ! phase_is_inner
+ endif
+ endif
+ enddo ! nrec
+ endif ! GPU_MODE
+ endif ! it
+ endif ! nadj_rec_local > 0
endif
! note: b_potential() is read in after Newark time scheme, thus
@@ -344,90 +388,116 @@
! adjoint simulations
if (SIMULATION_TYPE == 3) then
- ! adds acoustic sources
- do isource = 1,NSOURCES
- ! add the source (only if this proc carries the source)
- if(myrank == islice_selected_source(isource)) then
+ ! on GPU
+ if(GPU_MODE) then
+ if( NSOURCES > 0 ) then
+ do isource = 1,NSOURCES
+ if(USE_FORCE_POINT_SOURCE) then
+ ! precomputes source time function factors
+ stf_pre_compute(isource) = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr( &
+ dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur(isource))
+ else
+ stf_pre_compute(isource) = comp_source_time_function_gauss( &
+ dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ endif
+ enddo
+ stf_used_total = stf_used_total + sum(stf_pre_compute(:))
+
+ ! only implements SIMTYPE=3
+ call compute_add_sources_acoustic_sim3_cuda(Mesh_pointer, phase_is_inner, &
+ NSOURCES, SIMULATION_TYPE, &
+ USE_FORCE_POINT_SOURCE, stf_pre_compute, myrank)
+ endif
- ispec = ispec_selected_source(isource)
+ else ! .NOT. GPU_MODE
+
+ ! adds acoustic sources
+ do isource = 1,NSOURCES
- if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+ ! add the source (only if this proc carries the source)
+ if(myrank == islice_selected_source(isource)) then
- if( ispec_is_acoustic(ispec) ) then
+ ispec = ispec_selected_source(isource)
- if(USE_FORCE_POINT_SOURCE) then
+ if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
- ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
- iglob = ibool(nint(xi_source(isource)), &
- nint(eta_source(isource)), &
- nint(gamma_source(isource)), &
- ispec)
+ if( ispec_is_acoustic(ispec) ) then
- f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
+ if(USE_FORCE_POINT_SOURCE) then
- !if (it == 1 .and. myrank == 0) then
- ! write(IMAIN,*) 'using a source of dominant frequency ',f0
- ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
- ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
- !endif
+ ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+ iglob = ibool(nint(xi_source(isource)), &
+ nint(eta_source(isource)), &
+ nint(gamma_source(isource)), &
+ ispec)
- ! gaussian source time function
- !stf_used = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
- ! we use nu_source(:,3) here because we want a source normal to the surface.
- ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
- stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),f0)
+ !if (it == 1 .and. myrank == 0) then
+ ! write(IMAIN,*) 'using a source of dominant frequency ',f0
+ ! write(IMAIN,*) 'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+ ! write(IMAIN,*) 'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+ !endif
- ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
- ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
- ! to add minus the source to Chi_dot_dot to get plus the source in pressure:
+ ! gaussian source time function
+ !stf_used = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
- ! acoustic source for pressure gets divided by kappa
- ! source contribution
- b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
- - stf_used / kappastore(nint(xi_source(isource)), &
- nint(eta_source(isource)), &
- nint(gamma_source(isource)),ispec)
+ ! we use nu_source(:,3) here because we want a source normal to the surface.
+ ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
+ stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr( &
+ dble(NSTEP-it)*DT-t0-tshift_cmt(isource),f0)
- else
+ ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
+ ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
+ ! to add minus the source to Chi_dot_dot to get plus the source in pressure:
- ! gaussian source time
- stf = comp_source_time_function_gauss(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ ! acoustic source for pressure gets divided by kappa
+ ! source contribution
+ b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
+ - stf_used / kappastore(nint(xi_source(isource)), &
+ nint(eta_source(isource)), &
+ nint(gamma_source(isource)),ispec)
- ! distinguishes between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- stf_used = sngl(stf)
else
- stf_used = stf
- endif
- ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
- ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
- ! to add minus the source to Chi_dot_dot to get plus the source in pressure
+ ! gaussian source time
+ stf = comp_source_time_function_gauss(dble(NSTEP-it)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
- ! add source array
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- ! adds source contribution
- ! note: acoustic source for pressure gets divided by kappa
- iglob = ibool(i,j,k,ispec)
- b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
- - sourcearrays(isource,1,i,j,k) * stf_used / kappastore(i,j,k,ispec)
- enddo
+ ! distinguishes between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ stf_used = sngl(stf)
+ else
+ stf_used = stf
+ endif
+
+ ! beware, for acoustic medium, source is: pressure divided by Kappa of the fluid
+ ! the sign is negative because pressure p = - Chi_dot_dot therefore we need
+ ! to add minus the source to Chi_dot_dot to get plus the source in pressure
+
+ ! add source array
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ ! adds source contribution
+ ! note: acoustic source for pressure gets divided by kappa
+ iglob = ibool(i,j,k,ispec)
+ b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
+ - sourcearrays(isource,1,i,j,k) * stf_used / kappastore(i,j,k,ispec)
+ enddo
+ enddo
enddo
- enddo
- endif ! USE_FORCE_POINT_SOURCE
+ endif ! USE_FORCE_POINT_SOURCE
- stf_used_total = stf_used_total + stf_used
+ stf_used_total = stf_used_total + stf_used
- endif ! ispec_is_elastic
- endif ! ispec_is_inner
- endif ! myrank
+ endif ! ispec_is_elastic
+ endif ! ispec_is_inner
+ endif ! myrank
- enddo ! NSOURCES
+ enddo ! NSOURCES
+ endif ! GPU_MODE
endif
! master prints out source time function to file
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_cuda.cu 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,448 +0,0 @@
-#include <stdio.h>
-#include <cuda.h>
-#include <cublas.h>
-#include <mpi.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include "mesh_constants_cuda.h"
-
-// #include "epik_user.h"
-
-void print_CUDA_error_if_any(cudaError_t err, int num);
-
-typedef float real; //type of variables passed into function
-typedef float realw; //type of "working" variables
-
-#define MAXDEBUG 1
-#define ENABLE_VERY_SLOW_ERROR_CHECKING
-
-#if MAXDEBUG == 1
-#define LOG(x) printf("%s\n",x)
-#define PRINT5(var,offset) for(;print_count<5;print_count++) printf("var(%d)=%2.20f\n",print_count,var[offset+print_count]);
-#define PRINT10(var) if(print_count<10) { printf("var=%1.20e\n",var); print_count++; }
-#define PRINT10i(var) if(print_count<10) { printf("var=%d\n",var); print_count++; }
-#else
-#define LOG(x) // printf("%s\n",x);
-#define PRINT5(var,offset) // for(i=0;i<10;i++) printf("var(%d)=%f\n",i,var[offset+i]);
-#endif
-
-#define INDEX2(xsize,x,y) x + (y)*xsize
-#define INDEX3(xsize,ysize,x,y,z) x + (y)*xsize + (z)*xsize*ysize
-#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize
-#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize + (j)*xsize*ysize*zsize*isize
-
-#define INDEX6(xsize,ysize,zsize,isize,jsize,x,y,z,i,j,k) x + xsize*(y + ysize*(z + zsize*(i + isize*(j + jsize*k))))
-
-#define NDIM 3
-#define NGLLX 5
-
-
-// crashes if the CMTSOLUTION does not match the mesh properly
-__global__ void compute_add_sources_kernel(float* accel, int* ibool, int* ispec_is_inner, int phase_is_inner, float* sourcearrays, double* stf_pre_compute,int myrank, int* islice_selected_source, int* ispec_selected_source, int* ispec_is_elastic, int NSOURCES,float* d_debug) {
- int i = threadIdx.x;
- int j = threadIdx.y;
- int k = threadIdx.z;
-
- int isource = blockIdx.x + gridDim.x*blockIdx.y; // bx
- int ispec;
- int iglob;
- double stf;
-
- if(isource < NSOURCES) { // when NSOURCES > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
-
- if(myrank == islice_selected_source[isource]) {
-
- ispec = ispec_selected_source[isource]-1;
-
- if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec] == 1) {
-
- stf = stf_pre_compute[isource];
- iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
- atomicAdd(&accel[iglob*3],
- sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 0, i,j,k)]*stf);
- atomicAdd(&accel[iglob*3+1],
- sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 1, i,j,k)]*stf);
- // if((iglob*3+2 == 304598)) {
- // atomicAdd(&d_debug[0],1.0f);
- // d_debug[1] = accel[iglob*3+2];
- // d_debug[2] = sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)];
- // d_debug[3] = stf;
- // }
- // d_debug[4] = 42.0f;
- atomicAdd(&accel[iglob*3+2],
- sourcearrays[INDEX5(NSOURCES, 3, 5, 5,isource, 2, i,j,k)]*stf);
-
- }
- }
- }
-
-}
-
-extern "C" void add_sourcearrays_adjoint_cuda_(long* Mesh_pointer,
- int* USE_FORCE_POINT_SOURCE,
- double* h_stf_pre_compute,int* NSOURCES,
- int* phase_is_inner,int* myrank) {
- // EPIK_TRACER("add_sourcearrays_adjoint_cuda");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- if(*USE_FORCE_POINT_SOURCE) {
- printf("USE FORCE POINT SOURCE not implemented for GPU_MODE");
- MPI_Abort(MPI_COMM_WORLD, 1);
- }
- cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,*NSOURCES*sizeof(double),
- cudaMemcpyHostToDevice);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("noise_read_add_surface_movie_cuda_kernel");
-#endif
-
- int num_blocks_x = *NSOURCES;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(5,5,5);
-
- float* d_debug;
- // float* h_debug = (float*)calloc(128,sizeof(float));
- // cudaMalloc((void**)&d_debug,128*sizeof(float));
- // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-
- compute_add_sources_kernel<<<grid,threads>>>(mp->d_b_accel,mp->d_ibool, mp->d_ispec_is_inner, *phase_is_inner, mp->d_sourcearrays, mp->d_stf_pre_compute,*myrank, mp->d_islice_selected_source,mp->d_ispec_selected_source,mp->d_ispec_is_elastic, *NSOURCES,d_debug);
-
- // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
- // for(int i=0;i<10;i++) {
- // printf("debug[%d] = %e \n",i,h_debug[i]);
- // }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- // sync and check to catch errors from previous async operations
- cudaThreadSynchronize();
- cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess)
- {
- fprintf(stderr,"Error launching/running add_sourcearrays_adjoint_cuda:->\n\tcompute_add_sources_kernel: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-#endif
-
-
-}
-
-extern "C" void compute_add_sources_elastic_cuda_(long* Mesh_pointer_f, int* NSPEC_ABf, int* NGLOB_ABf, int* phase_is_innerf,int* NSOURCESf, int* itf, float* dtf, float* t0f,int* SIMULATION_TYPEf,int* NSTEPf,int* NOISE_TOMOGRAPHYf, int* USE_FORCE_POINT_SOURCEf, double* h_stf_pre_compute, int* myrankf) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- int NSPEC_AB = *NSPEC_ABf;
- int NGLOB_AB = *NGLOB_ABf;
- int phase_is_inner = *phase_is_innerf;
- int it = *itf;
- float dt = *dtf;
- float t0 = *t0f;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
- int NSTEP = *NSTEPf;
- int NOISE_TOMOGRAPHY = *NOISE_TOMOGRAPHYf;
- int NSOURCES = *NSOURCESf;
- int USE_FORCE_POINT_SOURCE = *USE_FORCE_POINT_SOURCEf;
- int myrank = *myrankf;
-
- float* d_debug;
- int num_blocks_x = NSOURCES;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- double* d_stf_pre_compute;
- print_CUDA_error_if_any(cudaMemcpy(mp->d_stf_pre_compute,h_stf_pre_compute,NSOURCES*sizeof(double),cudaMemcpyHostToDevice),18);
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(5,5,5);
- // (float* accel, int* ibool, int* ispec_is_inner, int phase_is_inner, float* sourcearrays, double* stf_pre_compute,int myrank, int* islice_selected_source, int* ispec_selected_source, int* ispec_is_elastic, int NSOURCES)
-
-
-
- compute_add_sources_kernel<<<grid,threads>>>(mp->d_accel,mp->d_ibool, mp->d_ispec_is_inner, phase_is_inner, mp->d_sourcearrays, mp->d_stf_pre_compute,myrank, mp->d_islice_selected_source,mp->d_ispec_selected_source,mp->d_ispec_is_elastic, NSOURCES,d_debug);
- #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- // sync and check to catch errors from previous async operations
- cudaThreadSynchronize();
- cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess)
- {
- fprintf(stderr,"Error launching/running compute_add_sources_kernel: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-#endif
-}
-
-__global__ void add_source_master_rec_noise_cuda_kernel(int* ibool, int* ispec_selected_rec, int irec_master_noise, real* accel, real* noise_sourcearray, int it) {
- int tx = threadIdx.x;
- int iglob = ibool[tx + 125*(ispec_selected_rec[irec_master_noise-1]-1)]-1;
-
- // not sure if we need atomic operations but just in case...
- // accel[3*iglob] += noise_sourcearray[3*tx + 3*125*it];
- // accel[1+3*iglob] += noise_sourcearray[1+3*tx + 3*125*it];
- // accel[2+3*iglob] += noise_sourcearray[2+3*tx + 3*125*it];
-
- atomicAdd(&accel[iglob*3],noise_sourcearray[3*tx + 3*125*it]);
- atomicAdd(&accel[iglob*3+1],noise_sourcearray[1+3*tx + 3*125*it]);
- atomicAdd(&accel[iglob*3+2],noise_sourcearray[2+3*tx + 3*125*it]);
-
-}
-
-extern "C" void add_source_master_rec_noise_cuda_(long* Mesh_pointer_f, int* myrank_f, int* it_f, int* irec_master_noise_f, int* islice_selected_rec) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- int it = *it_f-1; // -1 for Fortran -> C indexing differences
- int irec_master_noise = *irec_master_noise_f;
- int myrank = *myrank_f;
- dim3 grid(1,1,1);
- dim3 threads(125,1,1);
- if(myrank == islice_selected_rec[irec_master_noise-1]) {
- add_source_master_rec_noise_cuda_kernel<<<grid,threads>>>(mp->d_ibool, mp->d_ispec_selected_rec,
- irec_master_noise, mp->d_accel,
- mp->d_noise_sourcearray, it);
- #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- // sync and check to catch errors from previous async operations
- cudaThreadSynchronize();
- cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess)
- {
- fprintf(stderr,"Error launching/running add_source_master_rec_noise_cuda_kernel: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-#endif
- }
-}
-
-
-
-
-
-__global__ void add_sources_SIM_TYPE_2_OR_3_kernel(float* accel, int nrec,
- float* adj_sourcearrays,
- int* ibool,
- int* ispec_is_inner,
- int* ispec_selected_rec,
- int phase_is_inner,
- int* islice_selected_rec,
- int* pre_computed_irec,
- int nadj_rec_local,
- int NTSTEP_BETWEEN_ADJSRC,
- int myrank,
- int* debugi,
- float* debugf) {
- int irec_local = blockIdx.x + gridDim.x*blockIdx.y;
- if(irec_local<nadj_rec_local) { // when nrec > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
-
- int irec = pre_computed_irec[irec_local];
-
- int ispec_selected = ispec_selected_rec[irec]-1;
- if(ispec_is_inner[ispec_selected] == phase_is_inner) {
- int i = threadIdx.x;
- int j = threadIdx.y;
- int k = threadIdx.z;
- int iglob = ibool[i+5*(j+5*(k+5*ispec_selected))]-1;
-
- // atomic operations are absolutely necessary for correctness!
- atomicAdd(&(accel[0+3*iglob]),adj_sourcearrays[INDEX5(5,5,5,3,
- i,j,k,
- 0,
- irec_local)]);
-
- atomicAdd(&accel[1+3*iglob], adj_sourcearrays[INDEX5(5,5,5,3,
- i,j,k,
- 1,
- irec_local)]);
-
- atomicAdd(&accel[2+3*iglob],adj_sourcearrays[INDEX5(5,5,5,3,
- i,j,k,
- 2,
- irec_local)]);
- }
-
- }
-
-}
-
-extern "C" void add_sources_sim_type_2_or_3_(long* Mesh_pointer, float* h_adj_sourcearrays,
- int* size_adj_sourcearrays, int* ispec_is_inner,
- int* phase_is_inner, int* ispec_selected_rec,
- int* ibool,
- int* myrank, int* nrec, int* time_index,
- int* h_islice_selected_rec,int* nadj_rec_local,
- int* NTSTEP_BETWEEN_READ_ADJSRC) {
-
- if(*nadj_rec_local > 0) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- int rank;
- MPI_Comm_rank(MPI_COMM_WORLD,&rank);
-
- // make sure grid dimension is less than 65535 in x dimension
- int num_blocks_x = *nadj_rec_local;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
- dim3 grid(num_blocks_x,num_blocks_y,1);
- dim3 threads(5,5,5);
-
- float* d_adj_sourcearrays;
- print_CUDA_error_if_any(cudaMalloc((void**)&d_adj_sourcearrays,
- (*nadj_rec_local)*3*125*sizeof(float)),1);
- float* h_adj_sourcearrays_slice = (float*)malloc((*nadj_rec_local)*3*125*sizeof(float));
-
- int* h_pre_computed_irec = new int[*nadj_rec_local];
- int* d_pre_computed_irec;
- cudaMalloc((void**)&d_pre_computed_irec,(*nadj_rec_local)*sizeof(int));
-
- // build slice of adj_sourcearrays because full array is *very* large.
- int irec_local = 0;
- for(int irec = 0;irec<*nrec;irec++) {
- if(*myrank == h_islice_selected_rec[irec]) {
- irec_local++;
- h_pre_computed_irec[irec_local-1] = irec;
- if(ispec_is_inner[ispec_selected_rec[irec]-1] == *phase_is_inner) {
- for(int k=0;k<5;k++) {
- for(int j=0;j<5;j++) {
- for(int i=0;i<5;i++) {
-
- h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
- i,j,k,0,
- irec_local-1)]
- = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,
- *time_index-1,
- 0,i,j,k)];
-
- h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
- i,j,k,1,
- irec_local-1)]
- = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,
- *time_index-1,
- 1,i,j,k)];
-
- h_adj_sourcearrays_slice[INDEX5(5,5,5,3,
- i,j,k,2,
- irec_local-1)]
- = h_adj_sourcearrays[INDEX6(*nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- 3,5,5,
- irec_local-1,
- *time_index-1,
- 2,i,j,k)];
-
-
- }
- }
- }
- }
- }
- }
- // printf("irec_local vs. *nadj_rec_local -> %d vs. %d\n",irec_local,*nadj_rec_local);
- // for(int ispec=0;ispec<(*nadj_rec_local);ispec++) {
- // for(int i=0;i<5;i++)
- // for(int j=0;j<5;j++)
- // for(int k=0;k<5;k++) {
- // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,0,ispec)] =
- // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
- // ispec,
- // *time_index-1,
- // 0,
- // i,j,k)];
- // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,1,ispec)] =
- // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_READ_ADJSRC,3,5,5,
- // ispec,
- // *time_index-1,
- // 1,
- // i,j,k)];
- // h_adj_sourcearrays_slice[INDEX5(5,5,5,3,i,j,k,2,ispec)] =
- // h_adj_sourcearrays[INDEX6(*nadj_rec_local,*NTSTEP_BETWEEN_ADJSRC,3,5,5,
- // ispec,
- // *time_index-1,
- // 2,
- // i,j,k)];
- // }
-
- // }
-
- cudaMemcpy(d_adj_sourcearrays, h_adj_sourcearrays_slice,(*nadj_rec_local)*3*125*sizeof(float),
- cudaMemcpyHostToDevice);
-
-
- // the irec_local variable needs to be precomputed (as
- // h_pre_comp..), because normally it is in the loop updating accel,
- // and due to how it's incremented, it cannot be parallelized
-
- // int irec_local=0;
- // for(int irec=0;irec<*nrec;irec++) {
- // if(*myrank == h_islice_selected_rec[irec]) {
- // h_pre_computed_irec_local_index[irec] = irec_local;
- // irec_local++;
- // if(irec_local==1) {
- // // printf("%d:first useful irec==%d\n",rank,irec);
- // }
- // }
- // else h_pre_computed_irec_local_index[irec] = 0;
- // }
- cudaMemcpy(d_pre_computed_irec,h_pre_computed_irec,
- (*nadj_rec_local)*sizeof(int),cudaMemcpyHostToDevice);
- // pause_for_debugger(1);
- int* d_debugi, *h_debugi;
- float* d_debugf, *h_debugf;
- h_debugi = (int*)calloc(num_blocks_x,sizeof(int));
- cudaMalloc((void**)&d_debugi,num_blocks_x*sizeof(int));
- cudaMemcpy(d_debugi,h_debugi,num_blocks_x*sizeof(int),cudaMemcpyHostToDevice);
- h_debugf = (float*)calloc(num_blocks_x,sizeof(float));
- cudaMalloc((void**)&d_debugf,num_blocks_x*sizeof(float));
- cudaMemcpy(d_debugf,h_debugf,num_blocks_x*sizeof(float),cudaMemcpyHostToDevice);
-
- add_sources_SIM_TYPE_2_OR_3_kernel<<<grid,threads>>>(mp->d_accel, *nrec,
- d_adj_sourcearrays, mp->d_ibool,
- mp->d_ispec_is_inner,
- mp->d_ispec_selected_rec,
- *phase_is_inner,
- mp->d_islice_selected_rec,
- d_pre_computed_irec,
- *nadj_rec_local,
- *NTSTEP_BETWEEN_READ_ADJSRC,
- *myrank,
- d_debugi,d_debugf);
-
- cudaMemcpy(h_debugi,d_debugi,num_blocks_x*sizeof(int),cudaMemcpyDeviceToHost);
- cudaMemcpy(h_debugf,d_debugf,num_blocks_x*sizeof(float),cudaMemcpyDeviceToHost);
-
- // printf("%d: pre_com0:%d\n",rank,h_pre_computed_irec_local_index[0]);
- // printf("%d: pre_com1:%d\n",rank,h_pre_computed_irec_local_index[1]);
- // printf("%d: pre_com2:%d\n",rank,h_pre_computed_irec_local_index[2]);
- // for(int i=156;i<(156+30);i++) {
- // if(rank==0) printf("%d:debug[%d] = i/f = %d / %e\n",rank,i,h_debugi[i],h_debugf[i]);
- // }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- // cudaThreadSynchronize();
- // MPI_Barrier(MPI_COMM_WORLD);
- exit_on_cuda_error("add_sources_SIM_TYPE_2_OR_3_kernel");
-
- // printf("Proc %d exiting with successful kernel\n",rank);
- // exit(1);
-#endif
- delete h_pre_computed_irec;
- cudaFree(d_adj_sourcearrays);
- cudaFree(d_pre_computed_irec);
- }
-}
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90 (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_boundary_kernel.f90)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90 (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,233 @@
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine compute_boundary_kernel()
+
+! isotropic topography kernel computation
+! compare with Tromp et al. (2005), eq. (25), or see Liu & Tromp (2008), eq. (65)
+
+ use specfem_par
+ use specfem_par_elastic
+ use specfem_par_acoustic
+
+ implicit none
+ ! local parameters
+ real(kind=CUSTOM_REAL):: kernel_moho_top,kernel_moho_bot
+ integer :: i,j,k
+ integer :: ispec2D,igll,jgll
+ integer :: ispec_top,ispec_bot,iglob_top,iglob_bot
+ logical :: is_done
+
+ ! loops over top/bottom elements of moho surface
+ do ispec2D = 1, NSPEC2D_MOHO
+ ispec_top = ibelm_moho_top(ispec2D)
+ ispec_bot = ibelm_moho_bot(ispec2D)
+
+ ! elements on both sides available
+ if( ispec_top > 0 .and. ispec_bot > 0 ) then
+ ! loops over surface
+ do igll=1,NGLLSQUARE
+ i = ijk_moho_top(1,igll,ispec2D)
+ j = ijk_moho_top(2,igll,ispec2D)
+ k = ijk_moho_top(3,igll,ispec2D)
+ iglob_top = ibool(i,j,k,ispec_top)
+
+ ! computes contribution from top element
+ call compute_boundary_kernel_elem( kernel_moho_top, &
+ mustore(i,j,k,ispec_top), &
+ kappastore(i,j,k,ispec_top),rho_vs(i,j,k,ispec_top), &
+ accel(:,iglob_top),b_displ(:,iglob_top), &
+ dsdx_top(:,:,i,j,k,ispec2D),b_dsdx_top(:,:,i,j,k,ispec2D), &
+ normal_moho_top(:,igll,ispec2D) )
+
+ ! finds corresponding global node in bottom element
+ is_done = .false.
+ do jgll = 1,NGLLSQUARE
+ i = ijk_moho_bot(1,jgll,ispec2D)
+ j = ijk_moho_bot(2,jgll,ispec2D)
+ k = ijk_moho_bot(3,jgll,ispec2D)
+ iglob_bot = ibool(i,j,k,ispec_bot)
+
+ if( iglob_bot /= iglob_top ) cycle
+ ! iglob_top == iglob_bot!
+
+ ! computes contribution from bottom element
+ call compute_boundary_kernel_elem( kernel_moho_bot, &
+ mustore(i,j,k,ispec_bot), &
+ kappastore(i,j,k,ispec_bot),rho_vs(i,j,k,ispec_bot), &
+ accel(:,iglob_bot),b_displ(:,iglob_bot), &
+ dsdx_bot(:,:,i,j,k,ispec2D),b_dsdx_bot(:,:,i,j,k,ispec2D), &
+ normal_moho_bot(:,jgll,ispec2D) )
+
+ ! note: kernel point position: indices given by ijk_moho_top(:,igll,ispec2D)
+ moho_kl(igll,ispec2D) = moho_kl(igll,ispec2D) &
+ + (kernel_moho_top - kernel_moho_bot) * deltat
+
+ ! kernel done for this point
+ is_done = .true.
+ enddo
+
+ ! checks
+ if( .not. is_done ) then
+ print*,'error : moho kernel not computed'
+ print*,'ispec:',ispec_top,ispec_bot,iglob_top,i,j,k
+ call exit_mpi(myrank,'error moho kernel computation')
+ endif
+
+ enddo
+
+ ! only one element available
+ ! e.g. free-surface: see Tromp et al. (2005), eq. (28)
+ else if( ispec_bot > 0 .or. ispec_top > 0 ) then
+
+ ! loops over surface
+ do igll=1,NGLLSQUARE
+
+ if( ispec_top > 0 ) then
+ i = ijk_moho_top(1,igll,ispec2D)
+ j = ijk_moho_top(2,igll,ispec2D)
+ k = ijk_moho_top(3,igll,ispec2D)
+ iglob_top = ibool(i,j,k,ispec_top)
+
+ ! computes contribution from top element
+ call compute_boundary_kernel_elem( kernel_moho_top, &
+ mustore(i,j,k,ispec_top), &
+ kappastore(i,j,k,ispec_top),rho_vs(i,j,k,ispec_top), &
+ accel(:,iglob_top),b_displ(:,iglob_top), &
+ dsdx_top(:,:,i,j,k,ispec2D),b_dsdx_top(:,:,i,j,k,ispec2D), &
+ normal_moho_top(:,igll,ispec2D) )
+
+ ! note: kernel point position igll: indices given by ijk_moho_top(:,igll,ispec2D)
+ moho_kl(igll,ispec2D) = moho_kl(igll,ispec2D) + kernel_moho_top * deltat
+
+ else
+ i = ijk_moho_bot(1,igll,ispec2D)
+ j = ijk_moho_bot(2,igll,ispec2D)
+ k = ijk_moho_bot(3,igll,ispec2D)
+ iglob_bot = ibool(i,j,k,ispec_bot)
+
+ ! computes contribution from bottom element
+ call compute_boundary_kernel_elem( kernel_moho_bot, &
+ mustore(i,j,k,ispec_bot), &
+ kappastore(i,j,k,ispec_bot),rho_vs(i,j,k,ispec_bot), &
+ accel(:,iglob_bot),b_displ(:,iglob_bot), &
+ dsdx_bot(:,:,i,j,k,ispec2D),b_dsdx_bot(:,:,i,j,k,ispec2D), &
+ normal_moho_bot(:,igll,ispec2D) )
+
+ ! note: kernel point position igll: indices given by ijk_moho_bot(:,igll,ispec2D)
+ moho_kl(igll,ispec2D) = moho_kl(igll,ispec2D) - kernel_moho_bot * deltat
+
+ endif
+ enddo
+ endif
+ enddo ! ispec2D
+
+
+end subroutine compute_boundary_kernel
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+subroutine compute_boundary_kernel_elem(kernel, mul, kappal, rho_vsl, &
+ accel, b_displ, ds, b_ds, norm)
+
+! compute the boundary kernel contribution from one side of the boundary
+! see e.g.: Tromp et al. (2005), eq. (25), or Liu & Tromp (2008), eq. (65)
+
+ implicit none
+ include 'constants.h'
+
+ real(kind=CUSTOM_REAL) kernel, mul, kappal, rho_vsl
+ real(kind=CUSTOM_REAL) :: accel(NDIM), b_displ(NDIM), ds(NDIM,NDIM), b_ds(NDIM,NDIM), norm(NDIM)
+
+ real(kind=CUSTOM_REAL) :: eps3, eps(NDIM,NDIM), epsdev(NDIM,NDIM), normal(NDIM,1)
+ real(kind=CUSTOM_REAL) :: b_eps3, b_eps(NDIM,NDIM), b_epsdev(NDIM,NDIM)
+ real(kind=CUSTOM_REAL) :: temp1(NDIM,NDIM), rhol, kl(1,1), one_matrix(1,1)
+
+
+ normal(:,1) = norm
+ one_matrix(1,1) = ONE
+
+ ! adjoint strain (epsilon) trace
+ eps3 = ds(1,1) + ds(2,2) + ds(3,3)
+
+ ! adjoint strain tensor
+ eps(1,1) = ds(1,1)
+ eps(2,2) = ds(2,2)
+ eps(3,3) = ds(3,3)
+ eps(1,2) = (ds(1,2) + ds(2,1))/2
+ eps(1,3) = (ds(1,3) + ds(3,1))/2
+ eps(2,3) = (ds(2,3) + ds(3,2))/2
+ eps(2,1) = eps(1,2)
+ eps(3,1) = eps(1,3)
+ eps(3,2) = eps(2,3)
+
+ ! adjoint deviatoric strain component
+ epsdev = eps
+ epsdev(1,1) = eps(1,1) - eps3 / 3
+ epsdev(2,2) = eps(2,2) - eps3 / 3
+ epsdev(3,3) = eps(3,3) - eps3 / 3
+
+
+ ! backward/reconstructed-forward strain (epsilon) trace
+ b_eps3 = b_ds(1,1) + b_ds(2,2) + b_ds(3,3)
+
+ ! backward/reconstructed-forward strain tensor
+ b_eps(1,1) = b_ds(1,1)
+ b_eps(2,2) = b_ds(2,2)
+ b_eps(3,3) = b_ds(3,3)
+ b_eps(1,2) = (b_ds(1,2) + b_ds(2,1))/2
+ b_eps(1,3) = (b_ds(1,3) + b_ds(3,1))/2
+ b_eps(2,3) = (b_ds(2,3) + b_ds(3,2))/2
+ b_eps(2,1) = b_eps(1,2)
+ b_eps(3,1) = b_eps(1,3)
+ b_eps(3,2) = b_eps(2,3)
+
+ ! backward/reconstructed-forward deviatoric strain
+ b_epsdev = b_eps
+ b_epsdev(1,1) = b_eps(1,1) - b_eps3 / 3
+ b_epsdev(2,2) = b_eps(2,2) - b_eps3 / 3
+ b_epsdev(3,3) = b_eps(3,3) - b_eps3 / 3
+
+ ! matrix multiplication
+ temp1 = matmul(epsdev,b_epsdev)
+
+ ! density value
+ rhol = rho_vsl ** 2 / mul
+
+ ! isotropic kernel value
+ ! see e.g.: Tromp et al. (2005), eq. (25), or Liu & Tromp 2008, eq. (65)
+ kl = ( rhol * dot_product(accel(:), b_displ(:)) + kappal * eps3 * b_eps3 &
+ + 2 * mul * (temp1(1,1) + temp1(2,2) + temp1(3,3)) ) * one_matrix &
+ - kappal * matmul(transpose(normal),matmul(eps,normal)) * b_eps3 &
+ - kappal * matmul(transpose(normal),matmul(b_eps,normal)) * eps3 &
+ - 2 * mul * matmul(transpose(normal), matmul(matmul(b_epsdev,ds), normal)) &
+ - 2 * mul * matmul(transpose(normal), matmul(matmul(epsdev,b_ds), normal))
+
+ kernel = kl(1,1)
+
+end subroutine compute_boundary_kernel_elem
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -66,20 +66,36 @@
! enforces free surface (zeroes potentials at free surface)
- call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB, &
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB, &
potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
ibool,free_surface_ijk,free_surface_ispec, &
num_free_surface_faces,ispec_is_acoustic)
- ! adjoint simulations
- if( SIMULATION_TYPE == 3 ) &
- call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT, &
+ ! adjoint simulations
+ if( SIMULATION_TYPE == 3 ) &
+ call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT, &
b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
ibool,free_surface_ijk,free_surface_ispec, &
num_free_surface_faces,ispec_is_acoustic)
+ else
+ ! on GPU
+ call acoustic_enforce_free_surface_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)
+ endif
+ if(PML) then
+ ! enforces free surface on PML elements
- if(PML) call PML_acoustic_enforce_free_srfc(NSPEC_AB,NGLOB_AB, &
+ ! note:
+ ! PML routines are not implemented as CUDA kernels, we just transfer the fields
+ ! from the GPU to the CPU and vice versa
+
+ ! transfers potentials to the CPU
+ if(GPU_MODE) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
+ call PML_acoustic_enforce_free_srfc(NSPEC_AB,NGLOB_AB, &
potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
ibool,free_surface_ijk,free_surface_ispec, &
num_free_surface_faces,ispec_is_acoustic, &
@@ -89,6 +105,11 @@
chi1_dot_dot,chi2_t_dot_dot,&
chi3_dot_dot,chi4_dot_dot)
+ ! transfers potentials back to GPU
+ if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ endif
+
! distinguishes two runs: for points on MPI interfaces, and points within the partitions
do iphase=1,2
@@ -100,7 +121,9 @@
endif
! acoustic pressure term
- call compute_forces_acoustic_pot( iphase, NSPEC_AB,NGLOB_AB, &
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call compute_forces_acoustic_pot( iphase, NSPEC_AB,NGLOB_AB, &
potential_acoustic,potential_dot_dot_acoustic, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_yy,hprime_zz, &
@@ -110,9 +133,9 @@
num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
phase_ispec_inner_acoustic )
- ! adjoint simulations
- if( SIMULATION_TYPE == 3 ) &
- call compute_forces_acoustic_pot( iphase, NSPEC_ADJOINT,NGLOB_ADJOINT, &
+ ! adjoint simulations
+ if( SIMULATION_TYPE == 3 ) &
+ call compute_forces_acoustic_pot( iphase, NSPEC_ADJOINT,NGLOB_ADJOINT, &
b_potential_acoustic,b_potential_dot_dot_acoustic, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_yy,hprime_zz, &
@@ -121,8 +144,19 @@
rhostore,jacobian,ibool, &
num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
phase_ispec_inner_acoustic )
+ else
+ ! on GPU
+ ! includes code for SIMULATION_TYPE==3
+ call compute_forces_acoustic_cuda(Mesh_pointer, iphase, nspec_outer_acoustic, nspec_inner_acoustic, &
+ SIMULATION_TYPE)
+ endif
- if(PML) then
+
+ if(PML) then
+ ! transfers potentials to CPU
+ if(GPU_MODE) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
call compute_forces_acoustic_PML(NSPEC_AB,NGLOB_AB, &
ibool,ispec_is_inner,phase_is_inner, &
rhostore,ispec_is_acoustic,potential_acoustic, &
@@ -143,6 +177,10 @@
num_PML_ispec,PML_ispec,iglob_is_PML_interface,&
chi1_dot_dot,chi3_dot_dot,chi4_dot_dot)
+ ! transfers potentials back to GPU
+ if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
endif ! PML
! absorbing boundaries
@@ -158,7 +196,12 @@
num_PML_ispec,PML_ispec,ispec_is_PML_inum,&
chi1_dot,chi2_t,chi2_t_dot,chi3_dot,chi4_dot,&
chi1_dot_dot,chi3_dot_dot,chi4_dot_dot)
+ ! transfers potentials back to GPU
+ if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
else
+ ! Stacey boundary conditions
call compute_stacey_acoustic(NSPEC_AB,NGLOB_AB, &
potential_dot_dot_acoustic,potential_dot_acoustic, &
ibool,ispec_is_inner,phase_is_inner, &
@@ -166,12 +209,27 @@
num_abs_boundary_faces,rhostore,kappastore,ispec_is_acoustic, &
SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it,NGLOB_ADJOINT, &
b_potential_dot_dot_acoustic,b_reclen_potential, &
- b_absorb_potential,b_num_abs_boundary_faces)
+ b_absorb_potential,b_num_abs_boundary_faces, &
+ GPU_MODE,Mesh_pointer)
endif
endif
! elastic coupling
if(ELASTIC_SIMULATION ) then
+
+ ! transfers potentials to CPU
+ if(GPU_MODE) then
+ call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc,accel, Mesh_pointer)
+ ! backward simulation
+ if( SIMULATION_TYPE == 3 ) then
+ call transfer_fields_acoustic_from_device(NGLOB_AB,b_potential_acoustic, &
+ b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+ call transfer_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
+ endif
+ endif
+
call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
ibool,displ,potential_dot_dot_acoustic, &
num_coupling_ac_el_faces, &
@@ -188,12 +246,23 @@
coupling_ac_el_normal, &
coupling_ac_el_jacobian2Dw, &
ispec_is_inner,phase_is_inner)
+
+ ! transfers potentials to CPU
+ if(GPU_MODE) then
+ ! only potential_dot_dot_acoustic/b_potential_dot_dot_acoustic is updated above
+ call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ if( SIMULATION_TYPE == 3 ) &
+ call transfer_fields_acoustic_to_device(NGLOB_AB,b_potential_acoustic, &
+ b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+ endif
endif
! poroelastic coupling
! not implemented yet
- !if(POROELASTIC_SIMULATION ) &
- ! call compute_coupling_acoustic_poro()
+ if(POROELASTIC_SIMULATION ) &
+ ! call compute_coupling_acoustic_poro()
+ call exit_MPI(myrank,'poroelastic coupling with acoustic domain not implemented yet!')
! sources
call compute_add_sources_acoustic(NSPEC_AB,NGLOB_AB,potential_dot_dot_acoustic, &
@@ -205,54 +274,127 @@
SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
nrec,islice_selected_rec,ispec_selected_rec, &
nadj_rec_local,adj_sourcearrays,b_potential_dot_dot_acoustic, &
- NTSTEP_BETWEEN_READ_ADJSRC )
+ NTSTEP_BETWEEN_READ_ADJSRC, &
+ GPU_MODE, Mesh_pointer)
! assemble all the contributions between slices using MPI
if( phase_is_inner .eqv. .false. ) then
! sends potential_dot_dot_acoustic values to corresponding MPI interface neighbors (non-blocking)
- call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
+ if(.NOT. GPU_MODE) then
+ call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
my_neighbours_ext_mesh, &
request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+ else
+ ! on GPU
+ call transfer_boundary_potential_from_device(NGLOB_AB, Mesh_pointer, &
+ potential_dot_dot_acoustic, &
+ buffer_send_scalar_ext_mesh, &
+ num_interfaces_ext_mesh, &
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh, &
+ ibool_interfaces_ext_mesh, &
+ 1) ! <-- 1 == fwd accel
+ call assemble_MPI_scalar_ext_mesh_send_cuda(NPROC, &
+ buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,&
+ my_neighbours_ext_mesh, &
+ request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+ endif
+
! adjoint simulations
- if( SIMULATION_TYPE == 3 ) &
- call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
+ if( SIMULATION_TYPE == 3 ) then
+ if(.NOT. GPU_MODE) then
+ call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
my_neighbours_ext_mesh, &
b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
+ else
+ ! on GPU
+ call transfer_boundary_potential_from_device(NGLOB_AB, Mesh_pointer, &
+ b_potential_dot_dot_acoustic, &
+ b_buffer_send_scalar_ext_mesh,&
+ num_interfaces_ext_mesh, &
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh, &
+ ibool_interfaces_ext_mesh, &
+ 3) ! <-- 3 == adjoint b_accel
+
+ call assemble_MPI_scalar_ext_mesh_send_cuda(NPROC, &
+ b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, &
+ num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,&
+ my_neighbours_ext_mesh, &
+ b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
+
+ endif
+ endif
+
else
+
! waits for send/receive requests to be completed and assembles values
- call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
+ if(.NOT. GPU_MODE) then
+ call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+ else
+ ! on GPU
+ call assemble_MPI_scalar_ext_mesh_write_cuda(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
+ Mesh_pointer,&
+ buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
+ 1)
+ endif
+
! adjoint simulations
- if( SIMULATION_TYPE == 3 ) &
- call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
+ if( SIMULATION_TYPE == 3 ) then
+ if(.NOT. GPU_MODE) then
+ call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
max_nibool_interfaces_ext_mesh, &
nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
+ else
+ ! on GPU
+ call assemble_MPI_scalar_ext_mesh_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, &
+ Mesh_pointer, &
+ b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
+ max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+ b_request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
+ 3)
+ endif
+ endif
+ endif !phase_is_inner
- endif
-
-
enddo
- ! divides pressure with mass matrix
- potential_dot_dot_acoustic(:) = potential_dot_dot_acoustic(:) * rmass_acoustic(:)
+ if(.NOT. GPU_MODE) then
+ ! divides pressure with mass matrix
+ potential_dot_dot_acoustic(:) = potential_dot_dot_acoustic(:) * rmass_acoustic(:)
- ! adjoint simulations
- if (SIMULATION_TYPE == 3) &
- b_potential_dot_dot_acoustic(:) = b_potential_dot_dot_acoustic(:) * rmass_acoustic(:)
+ ! adjoint simulations
+ if (SIMULATION_TYPE == 3) &
+ b_potential_dot_dot_acoustic(:) = b_potential_dot_dot_acoustic(:) * rmass_acoustic(:)
+ else
+ ! on GPU
+ call kernel_3_a_acoustic_cuda(Mesh_pointer,NGLOB_AB,SIMULATION_TYPE)
+ endif
if(PML) then
+ ! note: no need to transfer fields between CPU and GPU;
+ ! PML arrays are all handled on the CPU
+
! divides local contributions with mass term
call PML_acoustic_mass_update(NSPEC_AB,NGLOB_AB,&
ispec_is_acoustic,rmass_acoustic,ibool,&
@@ -283,14 +425,25 @@
!
! corrector:
! updates the chi_dot term which requires chi_dot_dot(t+delta)
- potential_dot_acoustic(:) = potential_dot_acoustic(:) + deltatover2*potential_dot_dot_acoustic(:)
+ if( .NOT. GPU_MODE ) then
+ ! corrector
+ potential_dot_acoustic(:) = potential_dot_acoustic(:) + deltatover2*potential_dot_dot_acoustic(:)
- ! adjoint simulations
- if (SIMULATION_TYPE == 3) &
- b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) + b_deltatover2*b_potential_dot_dot_acoustic(:)
+ ! adjoint simulations
+ if (SIMULATION_TYPE == 3) &
+ b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) + b_deltatover2*b_potential_dot_dot_acoustic(:)
+ else
+ ! on GPU
+ call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,SIMULATION_TYPE,b_deltatover2)
+ endif
! updates potential_dot_acoustic and potential_dot_dot_acoustic inside PML region for plotting seismograms/movies
- if(PML) call PML_acoustic_update_potentials(NGLOB_AB,NSPEC_AB, &
+ if(PML) then
+ ! transfers potentials to CPU
+ if(GPU_MODE) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
+ call PML_acoustic_update_potentials(NGLOB_AB,NSPEC_AB, &
ibool,ispec_is_acoustic, &
potential_dot_acoustic,potential_dot_dot_acoustic,&
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
@@ -302,21 +455,38 @@
chi1_dot,chi2_t_dot,chi3_dot,chi4_dot,&
chi1_dot_dot,chi3_dot_dot,chi4_dot_dot)
+ ! transfers potentials to GPU
+ if(GPU_MODE) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
+ endif
+
! enforces free surface (zeroes potentials at free surface)
- call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB, &
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB, &
potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
ibool,free_surface_ijk,free_surface_ispec, &
num_free_surface_faces,ispec_is_acoustic)
- ! adjoint simulations
- if (SIMULATION_TYPE == 3) &
- call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT, &
+ ! adjoint simulations
+ if (SIMULATION_TYPE == 3) &
+ call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT, &
b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
ibool,free_surface_ijk,free_surface_ispec, &
num_free_surface_faces,ispec_is_acoustic)
+ else
+ ! on GPU
+ call acoustic_enforce_free_surface_cuda(Mesh_pointer,SIMULATION_TYPE,ABSORB_FREE_SURFACE)
+ endif
- if(PML) call PML_acoustic_enforce_free_srfc(NSPEC_AB,NGLOB_AB, &
+ if(PML) then
+ ! enforces free surface on PML elements
+ if( GPU_MODE ) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
+ call PML_acoustic_enforce_free_srfc(NSPEC_AB,NGLOB_AB, &
potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
ibool,free_surface_ijk,free_surface_ispec, &
num_free_surface_faces, &
@@ -327,6 +497,10 @@
chi1_dot_dot,chi2_t_dot_dot,&
chi3_dot_dot,chi4_dot_dot)
+ if( GPU_MODE ) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ endif
+
end subroutine compute_forces_acoustic
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -453,6 +453,21 @@
! acoustic coupling
if( ACOUSTIC_SIMULATION ) then
+
+ ! daniel: workaround - todo on GPU
+ ! transfers potentials to CPU
+ if(GPU_MODE) then
+ call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc,accel,Mesh_pointer)
+ ! backward simulation
+ if( SIMULATION_TYPE == 3 ) then
+ call transfer_fields_acoustic_from_device(NGLOB_AB,b_potential_acoustic, &
+ b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+ call transfer_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
+ endif
+ endif
+
call compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, &
ibool,accel,potential_dot_dot_acoustic, &
num_coupling_ac_el_faces, &
@@ -470,6 +485,16 @@
coupling_ac_el_normal, &
coupling_ac_el_jacobian2Dw, &
ispec_is_inner,phase_is_inner)
+
+ ! daniel: workaround - todo on GPU
+ ! transfers potentials to CPU
+ if(GPU_MODE) then
+ ! only accel/b_accel is updated above
+ call transfer_fields_to_device(NDIM*NGLOB_AB,displ,veloc,accel, Mesh_pointer)
+ if( SIMULATION_TYPE == 3 ) &
+ call transfer_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
+ endif
+
endif
@@ -504,10 +529,10 @@
buffer_send_vector_ext_mesh,&
num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,1) ! <-- 1 == fwd accel
- call assemble_MPI_vector_ext_mesh_send_cuda(NPROC,NGLOB_AB,accel, &
+ call assemble_MPI_vector_ext_mesh_send_cuda(NPROC, &
buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+ nibool_interfaces_ext_mesh,&
my_neighbours_ext_mesh, &
request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
endif ! GPU_MODE
@@ -526,10 +551,10 @@
b_buffer_send_vector_ext_mesh,&
num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
- call assemble_MPI_vector_ext_mesh_send_cuda(NPROC,NGLOB_ADJOINT,b_accel, &
+ call assemble_MPI_vector_ext_mesh_send_cuda(NPROC, &
b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
- nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+ nibool_interfaces_ext_mesh,&
my_neighbours_ext_mesh, &
b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_cuda.cu 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,978 +0,0 @@
-#include <stdio.h>
-#include <cuda.h>
-#include <cublas.h>
-#include <mpi.h>
-
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include "mesh_constants_cuda.h"
-
-// #include "epik_user.h"
-
-typedef float real;
-
-
-#define NGLL2 25
-
-__constant__ float d_hprime_xx[NGLL2];
-__constant__ float d_hprimewgll_xx[NGLL2];
-__constant__ float d_wgllwgll_xy[NGLL2];
-__constant__ float d_wgllwgll_xz[NGLL2];
-__constant__ float d_wgllwgll_yz[NGLL2];
-
-#define MAXDEBUG 1
-#define ENABLE_VERY_SLOW_ERROR_CHECKING
-
-#if MAXDEBUG == 1
-#define LOG(x) printf("%s\n",x)
-#define PRINT5(var,offset) for(;print_count<5;print_count++) printf("var(%d)=%2.20f\n",print_count,var[offset+print_count]);
-#define PRINT10(var) if(print_count<10) { printf("var=%1.20e\n",var); print_count++; }
-#define PRINT10i(var) if(print_count<10) { printf("var=%d\n",var); print_count++; }
-#else
-#define LOG(x) // printf("%s\n",x);
-#define PRINT5(var,offset) // for(i=0;i<10;i++) printf("var(%d)=%f\n",i,var[offset+i]);
-#endif
-
-void Kernel_2(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
- int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE);
-
-
-
-double get_time()
-{
- struct timeval t;
- struct timezone tzp;
- gettimeofday(&t, &tzp);
- return t.tv_sec + t.tv_usec*1e-6;
-}
-
-
-// prepares a device array with with all inter-element edge-nodes -- this
-// is followed by a memcpy and MPI operations
-__global__ void prepare_boundary_accel_on_device(float* d_accel, float* d_send_accel_buffer,
- int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
- int* d_nibool_interfaces_ext_mesh,
- int* d_ibool_interfaces_ext_mesh) {
-
- int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
- int bx = blockIdx.y*gridDim.x+blockIdx.x;
- int tx = threadIdx.x;
- int iinterface=0;
-
- for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
- if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
- d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)] =
- d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)];
- d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1] =
- d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1];
- d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2] =
- d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2];
- }
- }
-
-}
-
-// prepares and transfers the inter-element edge-nodes to the host to be MPI'd
-extern "C" void transfer_boundary_accel_from_device_(int* size, long* Mesh_pointer_f, float* accel,
- float* send_accel_buffer,
- int* num_interfaces_ext_mesh,
- int* max_nibool_interfaces_ext_mesh,
- int* nibool_interfaces_ext_mesh,
- int* ibool_interfaces_ext_mesh,
- int* FORWARD_OR_ADJOINT)
-
-{
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
-
-
- int blocksize = 256;
- int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
- int num_blocks_x = size_padded/blocksize;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
- //timing for memory xfer
- // cudaEvent_t start, stop;
- // float time;
- // cudaEventCreate(&start);
- // cudaEventCreate(&stop);
- // cudaEventRecord( start, 0 );
- if(*FORWARD_OR_ADJOINT == 1) {
- prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel,mp->d_send_accel_buffer,
- *num_interfaces_ext_mesh,
- *max_nibool_interfaces_ext_mesh,
- mp->d_nibool_interfaces_ext_mesh,
- mp->d_ibool_interfaces_ext_mesh);
- }
- else if(*FORWARD_OR_ADJOINT == 3) {
- prepare_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel,mp->d_send_accel_buffer,
- *num_interfaces_ext_mesh,
- *max_nibool_interfaces_ext_mesh,
- mp->d_nibool_interfaces_ext_mesh,
- mp->d_ibool_interfaces_ext_mesh);
- }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_adjoint_constants_device");
- // // sync and check to catch errors from previous async operations
- // cudaThreadSynchronize();
-
- // // printf("Elapsed time for xfer d->h: %f\n",end-start);
- // cudaError_t err = cudaGetLastError();
- // if (err != cudaSuccess)
- // {
- // fprintf(stderr,"Error launching/running prepare_boundary_kernel: %s\n", cudaGetErrorString(err));
- // exit(1);
- // }
-#endif
-
-
- cudaMemcpy(send_accel_buffer,mp->d_send_accel_buffer,
- 3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real),cudaMemcpyDeviceToHost);
-
- // finish timing of kernel+memcpy
- // cudaEventRecord( stop, 0 );
- // cudaEventSynchronize( stop );
- // cudaEventElapsedTime( &time, start, stop );
- // cudaEventDestroy( start );
- // cudaEventDestroy( stop );
- // printf("boundary xfer d->h Time: %f ms\n",time);
-
-}
-__global__ void assemble_boundary_accel_on_device(float* d_accel, float* d_send_accel_buffer,
- int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
- int* d_nibool_interfaces_ext_mesh,
- int* d_ibool_interfaces_ext_mesh) {
-
- int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
- int bx = blockIdx.y*gridDim.x+blockIdx.x;
- int tx = threadIdx.x;
- int iinterface=0;
-
- for( iinterface=0; iinterface < num_interfaces_ext_mesh; iinterface++) {
- if(id<d_nibool_interfaces_ext_mesh[iinterface]) {
-
- // for testing atomic operations against not atomic operations (0.1ms vs. 0.04 ms)
- // d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)] +=
- // d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)];
- // d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1] +=
- // d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1];
- // d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2] +=
- // d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2];
-
-
- atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)],
- d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)]);
- atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+1],
- d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+1]);
- atomicAdd(&d_accel[3*(d_ibool_interfaces_ext_mesh[id+max_nibool_interfaces_ext_mesh*iinterface]-1)+2],
- d_send_accel_buffer[3*(id + max_nibool_interfaces_ext_mesh*iinterface)+2]);
- }
- }
- // ! This step is done via previous function transfer_and_assemble...
- // ! do iinterface = 1, num_interfaces_ext_mesh
- // ! do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
- // ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
- // ! array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
- // ! enddo
- // ! enddo
-}
-
-
-// FORWARD_OR_ADJOINT == 1 for accel, and == 3 for b_accel
-extern "C"
-void transfer_and_assemble_accel_to_device_(long* Mesh_pointer, real* accel,
- real* buffer_recv_vector_ext_mesh,
- int* num_interfaces_ext_mesh,
- int* max_nibool_interfaces_ext_mesh,
- int* nibool_interfaces_ext_mesh,
- int* ibool_interfaces_ext_mesh,int* FORWARD_OR_ADJOINT) {
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-
- cudaMemcpy(mp->d_send_accel_buffer, buffer_recv_vector_ext_mesh, 3* *max_nibool_interfaces_ext_mesh* *num_interfaces_ext_mesh*sizeof(real), cudaMemcpyHostToDevice);
-
- int blocksize = 256;
- int size_padded = ((int)ceil(((double)*max_nibool_interfaces_ext_mesh)/((double)blocksize)))*blocksize;
- int num_blocks_x = size_padded/blocksize;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- double start_time = get_time();
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
- // cudaEvent_t start, stop;
- // float time;
- // cudaEventCreate(&start);
- // cudaEventCreate(&stop);
- // cudaEventRecord( start, 0 );
- if(*FORWARD_OR_ADJOINT == 1) { //assemble forward accel
- assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_accel, mp->d_send_accel_buffer,
- *num_interfaces_ext_mesh,
- *max_nibool_interfaces_ext_mesh,
- mp->d_nibool_interfaces_ext_mesh,
- mp->d_ibool_interfaces_ext_mesh);
- }
- else if(*FORWARD_OR_ADJOINT == 3) { //assemble adjoint accel
- assemble_boundary_accel_on_device<<<grid,threads>>>(mp->d_b_accel, mp->d_send_accel_buffer,
- *num_interfaces_ext_mesh,
- *max_nibool_interfaces_ext_mesh,
- mp->d_nibool_interfaces_ext_mesh,
- mp->d_ibool_interfaces_ext_mesh);
- }
-
- // cudaEventRecord( stop, 0 );
- // cudaEventSynchronize( stop );
- // cudaEventElapsedTime( &time, start, stop );
- // cudaEventDestroy( start );
- // cudaEventDestroy( stop );
- // printf("Boundary Assemble Kernel Execution Time: %f ms\n",time);
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- // sync and check to catch errors from previous async operations
- cudaThreadSynchronize();
- double end_time = get_time();
- //printf("Elapsed time: %e\n",end_time-start_time);
- cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess)
- {
- fprintf(stderr,"Error launching/running prepare_boundary_kernel: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-#endif
-
-}
-
-
-
-
-
-extern "C" void compute_forces_elastic_cuda_(long* Mesh_pointer_f,
- int* iphase,
- int* nspec_outer_elastic,
- int* nspec_inner_elastic,
- int* COMPUTE_AND_STORE_STRAIN,
- int* SIMULATION_TYPE) {
-
- // EPIK_TRACER("compute_forces_elastic_cuda");
- //printf("Running compute_forces\n");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-
- int num_elements;
-
- if( *iphase == 1 )
- num_elements = *nspec_outer_elastic;
- else
- num_elements = *nspec_inner_elastic;
- int myrank;
-
- /* MPI_Comm_rank(MPI_COMM_WORLD,&myrank); */
- /* if(myrank==0) { */
-
-
-
- Kernel_2(num_elements, mp, *iphase, *COMPUTE_AND_STORE_STRAIN,*SIMULATION_TYPE);
-
-
- cudaThreadSynchronize();
-/* MPI_Barrier(MPI_COMM_WORLD); */
-}
-
-__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase, int* d_ibool);
-
-__global__ void Kernel_2_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase,float* d_displ, float* d_accel, float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz, float* d_gammax, float* d_gammay, float* d_gammaz, float* d_kappav, float* d_muv,float* d_debug,int COMPUTE_AND_STORE_STRAIN,float* epsilondev_xx,float* epsilondev_yy,float* epsilondev_xy,float* epsilondev_xz,float* epsilondev_yz,float* epsilon_trace_over_3,int SIMULATION_TYPE);
-
-void Kernel_2(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
- int COMPUTE_AND_STORE_STRAIN,int SIMULATION_TYPE)
- {
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- // sync and check to catch errors from previous async operations
- cudaThreadSynchronize();
- cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess)
- {
- fprintf(stderr,"Error before kernel Kernel 2: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-#endif
-
- /* if the grid can handle the number of blocks, we let it be 1D */
- /* grid_2_x = nb_elem_color; */
- /* nb_elem_color is just how many blocks we are computing now */
-
- int num_blocks_x = nb_blocks_to_compute;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- int threads_2 = 128;//BLOCK_SIZE_K2;
- dim3 grid_2(num_blocks_x,num_blocks_y);
-
- // debugging
- //printf("Starting with grid %dx%d for %d blocks\n",num_blocks_x,num_blocks_y,nb_blocks_to_compute);
- float* d_debug, *h_debug;
- h_debug = (float*)calloc(128,sizeof(float));
- cudaMalloc((void**)&d_debug,128*sizeof(float));
- cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-
- // Cuda timing
- // cudaEvent_t start, stop;
- // float time;
- // cudaEventCreate(&start);
- // cudaEventCreate(&stop);
- // cudaEventRecord( start, 0 );
-
- Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
- mp->d_phase_ispec_inner_elastic,
- mp->d_num_phase_ispec_elastic, d_iphase,
- mp->d_displ, mp->d_accel,
- mp->d_xix, mp->d_xiy, mp->d_xiz,
- mp->d_etax, mp->d_etay, mp->d_etaz,
- mp->d_gammax, mp->d_gammay, mp->d_gammaz,
- mp->d_kappav, mp->d_muv,d_debug,
- COMPUTE_AND_STORE_STRAIN,
- mp->d_epsilondev_xx,
- mp->d_epsilondev_yy,
- mp->d_epsilondev_xy,
- mp->d_epsilondev_xz,
- mp->d_epsilondev_yz,
- mp->d_epsilon_trace_over_3,
- // 1);
- SIMULATION_TYPE);
-
-
- // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
- // int procid;
- // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
- // if(procid==0) {
- // for(int i=0;i<17;i++) {
- // printf("cudadebug[%d] = %e\n",i,h_debug[i]);
- // }
- // }
- free(h_debug);
- cudaFree(d_debug);
- #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("Kernel_2_impl");
- #endif
-
- if(SIMULATION_TYPE == 3) {
- Kernel_2_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,mp->NGLOB_AB, mp->d_ibool,
- mp->d_phase_ispec_inner_elastic,
- mp->d_num_phase_ispec_elastic, d_iphase,
- mp->d_b_displ, mp->d_b_accel,
- mp->d_xix, mp->d_xiy, mp->d_xiz,
- mp->d_etax, mp->d_etay, mp->d_etaz,
- mp->d_gammax, mp->d_gammay, mp->d_gammaz,
- mp->d_kappav, mp->d_muv,d_debug,
- COMPUTE_AND_STORE_STRAIN,
- mp->d_b_epsilondev_xx,
- mp->d_b_epsilondev_yy,
- mp->d_b_epsilondev_xy,
- mp->d_b_epsilondev_xz,
- mp->d_b_epsilondev_yz,
- mp->d_b_epsilon_trace_over_3,
- SIMULATION_TYPE);
- }
-
- // cudaEventRecord( stop, 0 );
- // cudaEventSynchronize( stop );
- // cudaEventElapsedTime( &time, start, stop );
- // cudaEventDestroy( start );
- // cudaEventDestroy( stop );
- // printf("Kernel2 Execution Time: %f ms\n",time);
-
- // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
- // for(int i=0;i<10;i++) {
- // printf("debug[%d]=%e\n",i,h_debug[i]);
- // }
-
- /* cudaThreadSynchronize(); */
- /* LOG("Kernel 2 finished"); */
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("Kernel_2_impl SIM_TYPE==3");
- #endif
-
- }
-
-__global__ void Kernel_test(float* d_debug_output,int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase, int* d_ibool) {
- int bx = blockIdx.x;
- int tx = threadIdx.x;
- int working_element;
- int ispec;
- int NGLL3_ALIGN = 128;
- if(tx==0 && bx==0) {
-
- d_debug_output[0] = 420.0;
-
- d_debug_output[2] = num_phase_ispec_elastic;
- d_debug_output[3] = d_iphase;
- working_element = d_phase_ispec_inner_elastic[bx + num_phase_ispec_elastic*(d_iphase-1)]-1;
- d_debug_output[4] = working_element;
- d_debug_output[5] = d_phase_ispec_inner_elastic[0];
- /* d_debug_output[1] = d_ibool[working_element*NGLL3_ALIGN + tx]-1; */
- }
- /* d_debug_output[1+tx+128*bx] = 69.0; */
-
-}
-
-// double precision temporary variables leads to 10% performance
-// decrease in Kernel_2_impl (not very much..)
-typedef float reald;
-
-// doesn't seem to change the performance.
-// #define MANUALLY_UNROLLED_LOOPS
-
-__global__ void Kernel_2_impl(int nb_blocks_to_compute,int NGLOB, int* d_ibool,int* d_phase_ispec_inner_elastic, int num_phase_ispec_elastic, int d_iphase,float* d_displ, float* d_accel, float* d_xix, float* d_xiy, float* d_xiz, float* d_etax, float* d_etay, float* d_etaz, float* d_gammax, float* d_gammay, float* d_gammaz, float* d_kappav, float* d_muv,float* d_debug,int COMPUTE_AND_STORE_STRAIN,float* epsilondev_xx,float* epsilondev_yy,float* epsilondev_xy,float* epsilondev_xz,float* epsilondev_yz,float* epsilon_trace_over_3,int SIMULATION_TYPE)
-{
-
- /* int bx = blockIdx.y*blockDim.x+blockIdx.x; //possible bug in original code*/
- int bx = blockIdx.y*gridDim.x+blockIdx.x;
- /* int bx = blockIdx.x; */
- int tx = threadIdx.x;
-
-
-
- const int NGLLX = 5;
- /* const int NGLL2 = 25; */
- const int NGLL3 = 125;
- const int NGLL3_ALIGN = 128;
-
- int K = (tx/NGLL2);
- int J = ((tx-K*NGLL2)/NGLLX);
- int I = (tx-K*NGLL2-J*NGLLX);
-
- int active,offset;
- int iglob = 0;
- int working_element;
- reald tempx1l,tempx2l,tempx3l,tempy1l,tempy2l,tempy3l,tempz1l,tempz2l,tempz3l;
- reald xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl;
- reald duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl;
- reald duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl;
- reald duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl;
- reald fac1,fac2,fac3,lambdal,mul,lambdalplus2mul,kappal;
- reald sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz;
-
-#ifndef MANUALLY_UNROLLED_LOOPS
- int l;
- float hp1,hp2,hp3;
-#endif
-
- __shared__ reald s_dummyx_loc[NGLL3];
- __shared__ reald s_dummyy_loc[NGLL3];
- __shared__ reald s_dummyz_loc[NGLL3];
-
- __shared__ reald s_tempx1[NGLL3];
- __shared__ reald s_tempx2[NGLL3];
- __shared__ reald s_tempx3[NGLL3];
- __shared__ reald s_tempy1[NGLL3];
- __shared__ reald s_tempy2[NGLL3];
- __shared__ reald s_tempy3[NGLL3];
- __shared__ reald s_tempz1[NGLL3];
- __shared__ reald s_tempz2[NGLL3];
- __shared__ reald s_tempz3[NGLL3];
-
-// use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
-// because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
- active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
-
-// copy from global memory to shared memory
-// each thread writes one of the NGLL^3 = 125 data points
- if (active) {
- // iphase-1 and working_element-1 for Fortran->C array conventions
- working_element = d_phase_ispec_inner_elastic[bx + num_phase_ispec_elastic*(d_iphase-1)]-1;
- // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
- iglob = d_ibool[working_element*125 + tx]-1;
-
-#ifdef USE_TEXTURES
- s_dummyx_loc[tx] = tex1Dfetch(tex_displ, iglob);
- s_dummyy_loc[tx] = tex1Dfetch(tex_displ, iglob + NGLOB);
- s_dummyz_loc[tx] = tex1Dfetch(tex_displ, iglob + 2*NGLOB);
-#else
- // changing iglob indexing to match fortran row changes fast style
- s_dummyx_loc[tx] = d_displ[iglob*3];
- s_dummyy_loc[tx] = d_displ[iglob*3 + 1];
- s_dummyz_loc[tx] = d_displ[iglob*3 + 2];
-#endif
- }
-
-// synchronize all the threads (one thread for each of the NGLL grid points of the
-// current spectral element) because we need the whole element to be ready in order
-// to be able to compute the matrix products along cut planes of the 3D element below
- __syncthreads();
-
-#ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
-
- if (active) {
-
-#ifndef MANUALLY_UNROLLED_LOOPS
-
- tempx1l = 0.f;
- tempx2l = 0.f;
- tempx3l = 0.f;
-
- tempy1l = 0.f;
- tempy2l = 0.f;
- tempy3l = 0.f;
-
- tempz1l = 0.f;
- tempz2l = 0.f;
- tempz3l = 0.f;
-
- for (l=0;l<NGLLX;l++) {
- hp1 = d_hprime_xx[l*NGLLX+I];
- offset = K*NGLL2+J*NGLLX+l;
- tempx1l += s_dummyx_loc[offset]*hp1;
- tempy1l += s_dummyy_loc[offset]*hp1;
- tempz1l += s_dummyz_loc[offset]*hp1;
-
- hp2 = d_hprime_xx[l*NGLLX+J];
- offset = K*NGLL2+l*NGLLX+I;
- tempx2l += s_dummyx_loc[offset]*hp2;
- tempy2l += s_dummyy_loc[offset]*hp2;
- tempz2l += s_dummyz_loc[offset]*hp2;
-
- hp3 = d_hprime_xx[l*NGLLX+K];
- offset = l*NGLL2+J*NGLLX+I;
- tempx3l += s_dummyx_loc[offset]*hp3;
- tempy3l += s_dummyy_loc[offset]*hp3;
- tempz3l += s_dummyz_loc[offset]*hp3;
-
- // if(working_element == 169 && tx == 0) {
- // atomicAdd(&d_debug[0],1.0);
- // d_debug[1+3*l] = tempz3l;
- // d_debug[2+3*l] = s_dummyz_loc[offset];
- // d_debug[3+3*l] = hp3;
- // }
-
- }
-#else
-
- tempx1l = s_dummyx_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
- + s_dummyx_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
- + s_dummyx_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
- + s_dummyx_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
- + s_dummyx_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
-
- tempy1l = s_dummyy_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
- + s_dummyy_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
- + s_dummyy_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
- + s_dummyy_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
- + s_dummyy_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
-
- tempz1l = s_dummyz_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
- + s_dummyz_loc[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
- + s_dummyz_loc[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
- + s_dummyz_loc[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
- + s_dummyz_loc[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
-
- tempx2l = s_dummyx_loc[K*NGLL2+I]*d_hprime_xx[J]
- + s_dummyx_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
- + s_dummyx_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
- + s_dummyx_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
- + s_dummyx_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
-
- tempy2l = s_dummyy_loc[K*NGLL2+I]*d_hprime_xx[J]
- + s_dummyy_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
- + s_dummyy_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
- + s_dummyy_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
- + s_dummyy_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
-
- tempz2l = s_dummyz_loc[K*NGLL2+I]*d_hprime_xx[J]
- + s_dummyz_loc[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
- + s_dummyz_loc[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
- + s_dummyz_loc[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
- + s_dummyz_loc[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
-
- tempx3l = s_dummyx_loc[J*NGLLX+I]*d_hprime_xx[K]
- + s_dummyx_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
- + s_dummyx_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
- + s_dummyx_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
- + s_dummyx_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
-
- tempy3l = s_dummyy_loc[J*NGLLX+I]*d_hprime_xx[K]
- + s_dummyy_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
- + s_dummyy_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
- + s_dummyy_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
- + s_dummyy_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
-
- tempz3l = s_dummyz_loc[J*NGLLX+I]*d_hprime_xx[K]
- + s_dummyz_loc[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
- + s_dummyz_loc[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
- + s_dummyz_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
- + s_dummyz_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
-
-#endif
-
-// compute derivatives of ux, uy and uz with respect to x, y and z
- offset = working_element*NGLL3_ALIGN + tx;
-
- xixl = d_xix[offset];
- xiyl = d_xiy[offset];
- xizl = d_xiz[offset];
- etaxl = d_etax[offset];
- etayl = d_etay[offset];
- etazl = d_etaz[offset];
- gammaxl = d_gammax[offset];
- gammayl = d_gammay[offset];
- gammazl = d_gammaz[offset];
-
- duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l;
- duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l;
- duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l;
-
- duydxl = xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l;
- duydyl = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l;
- duydzl = xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l;
-
- duzdxl = xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l;
- duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l;
- duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l;
-
-
-
- duxdxl_plus_duydyl = duxdxl + duydyl;
- duxdxl_plus_duzdzl = duxdxl + duzdzl;
- duydyl_plus_duzdzl = duydyl + duzdzl;
- duxdyl_plus_duydxl = duxdyl + duydxl;
- duzdxl_plus_duxdzl = duzdxl + duxdzl;
- duzdyl_plus_duydzl = duzdyl + duydzl;
-
- if(COMPUTE_AND_STORE_STRAIN) {
- float templ = 1.0f/3.0f * (duxdxl + duydyl + duzdzl);
- epsilondev_xx[offset] = duxdxl - templ;
- epsilondev_yy[offset] = duydyl - templ;
- epsilondev_xy[offset] = 0.5 * duxdyl_plus_duydxl;
- epsilondev_xz[offset] = 0.5 * duzdxl_plus_duxdzl;
- epsilondev_yz[offset] = 0.5 * duzdyl_plus_duydzl;
- if(SIMULATION_TYPE == 3) {
- epsilon_trace_over_3[tx + working_element*125] = templ;
- }
- }
-
-// compute elements with an elastic isotropic rheology
- kappal = d_kappav[offset];
- mul = d_muv[offset];
-
- lambdalplus2mul = kappal + 1.33333333333333333333f * mul; // 4./3. = 1.3333333
- lambdal = lambdalplus2mul - 2.f*mul;
-
-// compute the six components of the stress tensor sigma
- sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl;
- sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl;
- sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl;
-
- sigma_xy = mul*duxdyl_plus_duydxl;
- sigma_xz = mul*duzdxl_plus_duxdzl;
- sigma_yz = mul*duzdyl_plus_duydzl;
-
- jacobianl = 1.f / (xixl*(etayl*gammazl-etazl*gammayl)-xiyl*(etaxl*gammazl-etazl*gammaxl)+xizl*(etaxl*gammayl-etayl*gammaxl));
-
-// form the dot product with the test vector
- s_tempx1[tx] = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl);
- s_tempy1[tx] = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_yz*xizl);
- s_tempz1[tx] = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl);
-
- s_tempx2[tx] = jacobianl * (sigma_xx*etaxl + sigma_xy*etayl + sigma_xz*etazl);
- s_tempy2[tx] = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_yz*etazl);
- s_tempz2[tx] = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl);
-
- s_tempx3[tx] = jacobianl * (sigma_xx*gammaxl + sigma_xy*gammayl + sigma_xz*gammazl);
- s_tempy3[tx] = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_yz*gammazl);
- s_tempz3[tx] = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl);
-
- }
-
-// synchronize all the threads (one thread for each of the NGLL grid points of the
-// current spectral element) because we need the whole element to be ready in order
-// to be able to compute the matrix products along cut planes of the 3D element below
- __syncthreads();
-
- if (active) {
-
-#ifndef MANUALLY_UNROLLED_LOOPS
-
- tempx1l = 0.f;
- tempy1l = 0.f;
- tempz1l = 0.f;
-
- tempx2l = 0.f;
- tempy2l = 0.f;
- tempz2l = 0.f;
-
- tempx3l = 0.f;
- tempy3l = 0.f;
- tempz3l = 0.f;
-
- for (l=0;l<NGLLX;l++) {
-
- fac1 = d_hprimewgll_xx[I*NGLLX+l];
- offset = K*NGLL2+J*NGLLX+l;
- tempx1l += s_tempx1[offset]*fac1;
- tempy1l += s_tempy1[offset]*fac1;
- tempz1l += s_tempz1[offset]*fac1;
-
- fac2 = d_hprimewgll_xx[J*NGLLX+l];
- offset = K*NGLL2+l*NGLLX+I;
- tempx2l += s_tempx2[offset]*fac2;
- tempy2l += s_tempy2[offset]*fac2;
- tempz2l += s_tempz2[offset]*fac2;
-
- fac3 = d_hprimewgll_xx[K*NGLLX+l];
- offset = l*NGLL2+J*NGLLX+I;
- tempx3l += s_tempx3[offset]*fac3;
- tempy3l += s_tempy3[offset]*fac3;
- tempz3l += s_tempz3[offset]*fac3;
-
- if(working_element == 169)
- if(l==0)
- if(I+J+K == 0) {
- // atomicAdd(&d_debug[0],1.0);
- // d_debug[0] = fac3;
- // d_debug[1] = offset;
- // d_debug[2] = s_tempz3[offset];
- }
- }
-#else
-
- tempx1l = s_tempx1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
- + s_tempx1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
- + s_tempx1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
- + s_tempx1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
- + s_tempx1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
-
- tempy1l = s_tempy1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
- + s_tempy1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
- + s_tempy1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
- + s_tempy1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
- + s_tempy1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
-
- tempz1l = s_tempz1[K*NGLL2+J*NGLLX]*d_hprimewgll_xx[I*NGLLX]
- + s_tempz1[K*NGLL2+J*NGLLX+1]*d_hprimewgll_xx[I*NGLLX+1]
- + s_tempz1[K*NGLL2+J*NGLLX+2]*d_hprimewgll_xx[I*NGLLX+2]
- + s_tempz1[K*NGLL2+J*NGLLX+3]*d_hprimewgll_xx[I*NGLLX+3]
- + s_tempz1[K*NGLL2+J*NGLLX+4]*d_hprimewgll_xx[I*NGLLX+4];
-
- tempx2l = s_tempx2[K*NGLL2+I]*d_hprimewgll_xx[J*NGLLX]
- + s_tempx2[K*NGLL2+NGLLX+I]*d_hprimewgll_xx[J*NGLLX+1]
- + s_tempx2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+2]
- + s_tempx2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+3]
- + s_tempx2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+4];
-
- tempy2l = s_tempy2[K*NGLL2+I]*d_hprimewgll_xx[J*NGLLX]
- + s_tempy2[K*NGLL2+NGLLX+I]*d_hprimewgll_xx[J*NGLLX+1]
- + s_tempy2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+2]
- + s_tempy2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+3]
- + s_tempy2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+4];
-
- tempz2l = s_tempz2[K*NGLL2+I]*d_hprimewgll_xx[J*NGLLX]
- + s_tempz2[K*NGLL2+NGLLX+I]*d_hprimewgll_xx[J*NGLLX+1]
- + s_tempz2[K*NGLL2+2*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+2]
- + s_tempz2[K*NGLL2+3*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+3]
- + s_tempz2[K*NGLL2+4*NGLLX+I]*d_hprimewgll_xx[J*NGLLX+4];
-
- tempx3l = s_tempx3[J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX]
- + s_tempx3[NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+1]
- + s_tempx3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+2]
- + s_tempx3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+3]
- + s_tempx3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+4];
-
- tempy3l = s_tempy3[J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX]
- + s_tempy3[NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+1]
- + s_tempy3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+2]
- + s_tempy3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+3]
- + s_tempy3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+4];
-
- tempz3l = s_tempz3[J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX]
- + s_tempz3[NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+1]
- + s_tempz3[2*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+2]
- + s_tempz3[3*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+3]
- + s_tempz3[4*NGLL2+J*NGLLX+I]*d_hprimewgll_xx[K*NGLLX+4];
-
-#endif
-
- fac1 = d_wgllwgll_yz[K*NGLLX+J];
- fac2 = d_wgllwgll_xz[K*NGLLX+I];
- fac3 = d_wgllwgll_xy[J*NGLLX+I];
-
-#ifdef USE_TEXTURES
- d_accel[iglob] = tex1Dfetch(tex_accel, iglob) - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
- d_accel[iglob + NGLOB] = tex1Dfetch(tex_accel, iglob + NGLOB) - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
- d_accel[iglob + 2*NGLOB] = tex1Dfetch(tex_accel, iglob + 2*NGLOB) - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
-#else
- /* OLD/To be implemented version that uses coloring to get around race condition. About 1.6x faster */
- // d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
- // d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
- // d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
-
- if(iglob*3+2 == 41153) {
- // int ot = d_debug[5];
- // d_debug[0+1+ot] = d_accel[iglob*3+2];
- // // d_debug[1+1+ot] = fac1*tempz1l;
- // // d_debug[2+1+ot] = fac2*tempz2l;
- // // d_debug[3+1+ot] = fac3*tempz3l;
- // d_debug[1+1+ot] = fac1;
- // d_debug[2+1+ot] = fac2;
- // d_debug[3+1+ot] = fac3;
- // d_debug[4+1+ot] = d_accel[iglob*3+2]-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
- // atomicAdd(&d_debug[0],1.0);
- // d_debug[6+ot] = d_displ[iglob*3+2];
- }
-
- atomicAdd(&d_accel[iglob*3],-(fac1*tempx1l + fac2*tempx2l + fac3*tempx3l));
- atomicAdd(&d_accel[iglob*3+1],-(fac1*tempy1l + fac2*tempy2l + fac3*tempy3l));
- atomicAdd(&d_accel[iglob*3+2],-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l));
-
-#endif
- }
-
-#else // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
- d_accel[iglob] -= 0.00000001f;
- d_accel[iglob + NGLOB] -= 0.00000001f;
- d_accel[iglob + 2*NGLOB] -= 0.00000001f;
-#endif // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
-}
-
-__global__ void kernel_3_cuda_device(real* veloc,
- real* accel, int size,
- real deltatover2, real* rmass);
-
-extern "C" void kernel_3_cuda_(long* Mesh_pointer,int* size_F, float* deltatover2_F, int* SIMULATION_TYPE_f, float* b_deltatover2) {
- Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
- int size = *size_F;
- int SIMULATION_TYPE = *SIMULATION_TYPE_f;
- real deltatover2 = *deltatover2_F;
- int blocksize=128;
- int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
- int num_blocks_x = size_padded/blocksize;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
- kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc, mp->d_accel, size, deltatover2, mp->d_rmass);
-
- if(SIMULATION_TYPE == 3) {
- kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc, mp->d_b_accel, size, *b_deltatover2,mp->d_rmass);
- }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-
- //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- // sync and check to catch errors from previous async operations
- cudaThreadSynchronize();
- MPI_Barrier(MPI_COMM_WORLD);
- cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess)
- {
- fprintf(stderr,"Error after kernel 3: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-#endif
-}
-
- __global__ void kernel_3_cuda_device(real* veloc,
- real* accel, int size,
- real deltatover2, real* rmass) {
- int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
- /* because of block and grid sizing problems, there is a small */
- /* amount of buffer at the end of the calculation */
- if(id < size) {
- accel[3*id] = accel[3*id]*rmass[id];
- accel[3*id+1] = accel[3*id+1]*rmass[id];
- accel[3*id+2] = accel[3*id+2]*rmass[id];
-
- veloc[3*id] = veloc[3*id] + deltatover2*accel[3*id];
- veloc[3*id+1] = veloc[3*id+1] + deltatover2*accel[3*id+1];
- veloc[3*id+2] = veloc[3*id+2] + deltatover2*accel[3*id+2];
- }
- }
-
-void setConst_hprime_xx(float* array)
-{
-
- cudaError_t err = cudaMemcpyToSymbol(d_hprime_xx, array, NGLL2*sizeof(float));
- if (err != cudaSuccess)
- {
- fprintf(stderr, "Error in setConst_hprime_xx: %s\n", cudaGetErrorString(err));
- fprintf(stderr, "The problem is maybe -arch sm_13 instead of -arch sm_11 in the Makefile, please doublecheck\n");
- exit(1);
- }
-}
-
-void setConst_hprimewgll_xx(float* array)
-{
- cudaError_t err = cudaMemcpyToSymbol(d_hprimewgll_xx, array, NGLL2*sizeof(float));
- if (err != cudaSuccess)
- {
- fprintf(stderr, "Error in setConst_hprime_xx: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-}
-
-void setConst_wgllwgll_xy(float* array,Mesh* mp)
-{
- cudaError_t err = cudaMemcpyToSymbol(d_wgllwgll_xy, array, NGLL2*sizeof(float));
- if (err != cudaSuccess)
- {
- fprintf(stderr, "Error in setConst_wgllwgll_xy: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-
- // mp->d_wgllwgll_xy = d_wgllwgll_xy; // this doesn't work, use
- // following cudaGetSymbolAddress
- err = cudaGetSymbolAddress((void**)&(mp->d_wgllwgll_xy),"d_wgllwgll_xy");
- printf("setting up mp->d_wgllwgll_xy\n");
- if(err != cudaSuccess) {
- fprintf(stderr, "Error with d_wgllwgll_xy: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-}
-
-void setConst_wgllwgll_xz(float* array,Mesh* mp)
-{
- cudaError_t err = cudaMemcpyToSymbol(d_wgllwgll_xz, array, NGLL2*sizeof(float));
- if (err != cudaSuccess)
- {
- fprintf(stderr, "Error in setConst_wgllwgll_xz: %s\n", cudaGetErrorString(err));
- exit(1);
- }
- // mp->d_wgllwgll_xz = d_wgllwgll_xz;
- err = cudaGetSymbolAddress((void**)&(mp->d_wgllwgll_xz),"d_wgllwgll_xz");
- if(err != cudaSuccess) {
- fprintf(stderr, "Error with d_wgllwgll_xz: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-}
-
-void setConst_wgllwgll_yz(float* array,Mesh* mp)
-{
- cudaError_t err = cudaMemcpyToSymbol(d_wgllwgll_yz, array, NGLL2*sizeof(float));
- if (err != cudaSuccess)
- {
- fprintf(stderr, "Error in setConst_wgllwgll_yz: %s\n", cudaGetErrorString(err));
- exit(1);
- }
- // mp->d_wgllwgll_yz = d_wgllwgll_yz;
- err = cudaGetSymbolAddress((void**)&(mp->d_wgllwgll_yz),"d_wgllwgll_yz");
- if(err != cudaSuccess) {
- fprintf(stderr, "Error with d_wgllwgll_yz: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-}
Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90 (from rev 19012, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/compute_kernels.f90)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90 (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -0,0 +1,317 @@
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine compute_kernels()
+
+! kernel calculations
+! see e.g. Tromp et al. (2005)
+
+ use specfem_par
+ use specfem_par_elastic
+ use specfem_par_acoustic
+ implicit none
+
+ ! elastic simulations
+ if( ELASTIC_SIMULATION ) then
+ call compute_kernels_el()
+ endif
+
+ ! elastic simulations
+ if( ACOUSTIC_SIMULATION ) then
+ call compute_kernels_ac()
+ endif
+
+ ! computes an approximative hessian for preconditioning kernels
+ if ( APPROXIMATE_HESS_KL ) then
+ call compute_kernels_hessian()
+ endif
+
+ end subroutine compute_kernels
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine compute_kernels_el()
+
+! kernel calculations
+! see e.g. Tromp et al. (2005)
+
+ use specfem_par
+ use specfem_par_elastic
+ use specfem_par_movie,only: nfaces_surface_ext_mesh
+
+ implicit none
+ ! local parameters
+ integer :: i,j,k,ispec,iglob
+ real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc,b_epsilondev_loc
+
+ ! updates kernels on GPU
+ if(GPU_MODE) then
+ call compute_kernels_cuda(Mesh_pointer,NOISE_TOMOGRAPHY,ELASTIC_SIMULATION,SAVE_MOHO_MESH, &
+ deltat)
+
+ ! for noise simulations --- source strength kernel
+ if (NOISE_TOMOGRAPHY == 3) &
+ call compute_kernels_strength_noise(NGLLSQUARE*num_free_surface_faces,ibool, &
+ sigma_kl,displ,deltat,it, &
+ normal_x_noise,normal_y_noise,normal_z_noise, &
+ noise_surface_movie, &
+ NSPEC_AB,NGLOB_AB, &
+ num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
+ GPU_MODE,Mesh_pointer)
+
+ ! kernels are done
+ return
+ endif
+
+ ! updates kernels on CPU
+ do ispec = 1, NSPEC_AB
+
+ ! elastic domains
+ if( ispec_is_elastic(ispec) ) then
+
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool(i,j,k,ispec)
+
+ ! isotropic kernels
+ ! note: takes displacement from backward/reconstructed (forward) field b_displ
+ ! and acceleration from adjoint field accel (containing adjoint sources)
+ !
+ ! note: : time integral summation uses deltat
+ !
+ ! compare with Tromp et al. (2005), eq. (14), which takes adjoint displacement
+ ! and forward acceleration, that is the symmetric form of what is calculated here
+ ! however, this kernel expression is symmetric with regards
+ ! to interchange adjoint - forward field
+ rho_kl(i,j,k,ispec) = rho_kl(i,j,k,ispec) &
+ + deltat * dot_product(accel(:,iglob), b_displ(:,iglob))
+
+ ! kernel for shear modulus, see e.g. Tromp et al. (2005), equation (17)
+ ! note: multiplication with 2*mu(x) will be done after the time loop
+ epsilondev_loc(1) = epsilondev_xx(i,j,k,ispec)
+ epsilondev_loc(2) = epsilondev_yy(i,j,k,ispec)
+ epsilondev_loc(3) = epsilondev_xy(i,j,k,ispec)
+ epsilondev_loc(4) = epsilondev_xz(i,j,k,ispec)
+ epsilondev_loc(5) = epsilondev_yz(i,j,k,ispec)
+
+ b_epsilondev_loc(1) = b_epsilondev_xx(i,j,k,ispec)
+ b_epsilondev_loc(2) = b_epsilondev_yy(i,j,k,ispec)
+ b_epsilondev_loc(3) = b_epsilondev_xy(i,j,k,ispec)
+ b_epsilondev_loc(4) = b_epsilondev_xz(i,j,k,ispec)
+ b_epsilondev_loc(5) = b_epsilondev_yz(i,j,k,ispec)
+
+ mu_kl(i,j,k,ispec) = mu_kl(i,j,k,ispec) &
+ + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
+ + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
+ + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
+ epsilondev_loc(5)*b_epsilondev_loc(5)) )
+
+ ! kernel for bulk modulus, see e.g. Tromp et al. (2005), equation (18)
+ ! note: multiplication with kappa(x) will be done after the time loop
+ kappa_kl(i,j,k,ispec) = kappa_kl(i,j,k,ispec) &
+ + deltat * (9 * epsilon_trace_over_3(i,j,k,ispec) &
+ * b_epsilon_trace_over_3(i,j,k,ispec))
+
+ enddo
+ enddo
+ enddo
+ endif !ispec_is_elastic
+
+ enddo
+
+ ! moho kernel
+ if( SAVE_MOHO_MESH ) then
+ call compute_boundary_kernel()
+ endif
+
+ ! for noise simulations --- source strength kernel
+ if (NOISE_TOMOGRAPHY == 3) &
+ call compute_kernels_strength_noise(NGLLSQUARE*num_free_surface_faces,ibool, &
+ sigma_kl,displ,deltat,it, &
+ normal_x_noise,normal_y_noise,normal_z_noise, &
+ noise_surface_movie, &
+ NSPEC_AB,NGLOB_AB, &
+ num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
+ GPU_MODE,Mesh_pointer)
+
+ end subroutine compute_kernels_el
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine compute_kernels_ac()
+
+! kernel calculations
+! see e.g. Tromp et al. (2005)
+
+ use specfem_par
+ use specfem_par_acoustic
+
+ implicit none
+ ! local parameters
+ real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ):: b_displ_elm,accel_elm
+ real(kind=CUSTOM_REAL) :: kappal,rhol
+ integer :: i,j,k,ispec,iglob
+
+ ! updates kernels on GPU
+ if(GPU_MODE) then
+
+ ! computes contribution to density and bulk modulus kernel
+ call compute_kernels_acoustic_cuda(Mesh_pointer,deltat)
+
+ ! kernels are done
+ return
+
+ !daniel
+ !call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ ! potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ !call transfer_b_fields_acoustic_from_device(NGLOB_AB,b_potential_acoustic, &
+ ! b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+
+ endif
+
+ ! updates kernels
+ do ispec = 1, NSPEC_AB
+
+ ! acoustic domains
+ if( ispec_is_acoustic(ispec) ) then
+
+ ! backward fields: displacement vector
+ call compute_gradient(ispec,NSPEC_ADJOINT,NGLOB_ADJOINT, &
+ b_potential_acoustic, b_displ_elm,&
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ ibool,rhostore)
+ ! adjoint fields: acceleration vector
+ call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+ potential_dot_dot_acoustic, accel_elm,&
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ ibool,rhostore)
+
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool(i,j,k,ispec)
+
+ ! density kernel
+ rhol = rhostore(i,j,k,ispec)
+ rho_ac_kl(i,j,k,ispec) = rho_ac_kl(i,j,k,ispec) &
+ - deltat * rhol * dot_product(accel_elm(:,i,j,k), b_displ_elm(:,i,j,k))
+
+ ! bulk modulus kernel
+ kappal = kappastore(i,j,k,ispec)
+ kappa_ac_kl(i,j,k,ispec) = kappa_ac_kl(i,j,k,ispec) &
+ - deltat / kappal &
+ * potential_dot_dot_acoustic(iglob) &
+ * b_potential_dot_dot_acoustic(iglob)
+
+ enddo
+ enddo
+ enddo
+ endif ! ispec_is_acoustic
+
+ enddo
+
+ end subroutine compute_kernels_ac
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine compute_kernels_hessian()
+
+ use specfem_par
+ use specfem_par_elastic
+ use specfem_par_acoustic
+
+ implicit none
+ ! local parameters
+ real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ):: b_accel_elm,accel_elm
+ integer :: i,j,k,ispec,iglob
+
+ ! loops over all elements
+ do ispec = 1, NSPEC_AB
+
+ ! acoustic domains
+ if( ispec_is_acoustic(ispec) ) then
+
+ ! adjoint fields: acceleration vector
+ call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+ potential_dot_dot_acoustic, accel_elm,&
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ ibool,rhostore)
+
+ ! adjoint fields: acceleration vector
+ call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+ b_potential_dot_dot_acoustic, b_accel_elm,&
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ ibool,rhostore)
+
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool(i,j,k,ispec)
+
+ ! approximates hessian
+ ! term with adjoint acceleration and backward/reconstructed acceleration
+ hess_ac_kl(i,j,k,ispec) = hess_ac_kl(i,j,k,ispec) &
+ + deltat * dot_product(accel_elm(:,i,j,k), b_accel_elm(:,i,j,k))
+
+ enddo
+ enddo
+ enddo
+ endif
+
+ ! elastic domains
+ if( ispec_is_elastic(ispec) ) then
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool(i,j,k,ispec)
+
+ ! approximates hessian
+ ! term with adjoint acceleration and backward/reconstructed acceleration
+ hess_kl(i,j,k,ispec) = hess_kl(i,j,k,ispec) &
+ + deltat * dot_product(accel(:,iglob), b_accel(:,iglob))
+
+ enddo
+ enddo
+ enddo
+ endif
+
+ enddo
+
+ end subroutine compute_kernels_hessian
+
+
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels_cuda.cu 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,268 +0,0 @@
-#include <stdio.h>
-#include <cuda.h>
-#include <cublas.h>
-#include <mpi.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include "mesh_constants_cuda.h"
-#define MAX(x,y) (((x) < (y)) ? (y) : (x))
-void print_CUDA_error_if_any(cudaError_t err, int num);
-
-#define ENABLE_VERY_SLOW_ERROR_CHECKING
-
-#define INDEX2(xsize,x,y) x + (y)*xsize
-#define INDEX3(xsize,ysize,x,y,z) x + (y)*xsize + (z)*xsize*ysize
-#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize
-#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize + (j)*xsize*ysize*zsize*isize
-
-
-__global__ void compute_kernels_cudakernel(int* ispec_is_elastic, int* ibool,
- float* accel,
- float* b_displ,
- float* epsilondev_xx,
- float* epsilondev_yy,
- float* epsilondev_xy,
- float* epsilondev_xz,
- float* epsilondev_yz,
- float* b_epsilondev_xx,
- float* b_epsilondev_yy,
- float* b_epsilondev_xy,
- float* b_epsilondev_xz,
- float* b_epsilondev_yz,
- float* rho_kl,
- float deltat,
- float* mu_kl,
- float* kappa_kl,
- float* epsilon_trace_over_3,
- float* b_epsilon_trace_over_3,
- int NSPEC_AB,
- float* d_debug) {
-
- int ispec = blockIdx.x + blockIdx.y*gridDim.x;
- if(ispec<NSPEC_AB) { // handles case when there is 1 extra block (due to rectangular grid)
- int ijk = threadIdx.x;
- int ijk_ispec = ijk + 125*ispec;
- int iglob = ibool[ijk_ispec]-1;
-
- // if(ispec_is_elastic[ispec]) { // leave out until have acoustic coupling
- if(1) {
-
-
- if(ijk_ispec == 9480531) {
- d_debug[0] = rho_kl[ijk_ispec];
- d_debug[1] = accel[3*iglob];
- d_debug[2] = b_displ[3*iglob];
- d_debug[3] = deltat * (accel[3*iglob]*b_displ[3*iglob]+
- accel[3*iglob+1]*b_displ[3*iglob+1]+
- accel[3*iglob+2]*b_displ[3*iglob+2]);
- }
-
- rho_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_displ[3*iglob]+
- accel[3*iglob+1]*b_displ[3*iglob+1]+
- accel[3*iglob+2]*b_displ[3*iglob+2]);
-
-
-
- // if(rho_kl[ijk_ispec] < 1.9983e+18) {
- // atomicAdd(&d_debug[3],1.0);
- // d_debug[4] = ijk_ispec;
- // d_debug[0] = rho_kl[ijk_ispec];
- // d_debug[1] = accel[3*iglob];
- // d_debug[2] = b_displ[3*iglob];
- // }
-
- mu_kl[ijk_ispec] += deltat * (epsilondev_xx[ijk_ispec]*b_epsilondev_xx[ijk_ispec]+ // 1*b1
- epsilondev_yy[ijk_ispec]*b_epsilondev_yy[ijk_ispec]+ // 2*b2
- (epsilondev_xx[ijk_ispec]+epsilondev_yy[ijk_ispec])*
- (b_epsilondev_xx[ijk_ispec]+b_epsilondev_yy[ijk_ispec])+
- 2*(epsilondev_xy[ijk_ispec]*b_epsilondev_xy[ijk_ispec]+
- epsilondev_xz[ijk_ispec]*b_epsilondev_xz[ijk_ispec]+
- epsilondev_yz[ijk_ispec]*b_epsilondev_yz[ijk_ispec]));
-
- kappa_kl[ijk_ispec] += deltat*(9*epsilon_trace_over_3[ijk_ispec]*
- b_epsilon_trace_over_3[ijk_ispec]);
-
- }
- }
-}
-
-
-
-extern "C" void compute_kernels_cuda_(long* Mesh_pointer, int* NOISE_TOMOGRAPHY,
- int* ELASTIC_SIMULATION, int* SAVE_MOHO_MESH,float* deltat) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-
- int blocksize = 125; // NGLLX*NGLLY*NGLLZ
- int num_blocks_x = mp->NSPEC_AB;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
- float* d_debug;
- float* h_debug;
- h_debug = (float*)calloc(128,sizeof(float));
- cudaMalloc((void**)&d_debug,128*sizeof(float));
- cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-
-
- compute_kernels_cudakernel<<<grid,threads>>>(mp->d_ispec_is_elastic,mp->d_ibool,
- mp->d_accel, mp->d_b_displ,
- mp->d_epsilondev_xx,
- mp->d_epsilondev_yy,
- mp->d_epsilondev_xy,
- mp->d_epsilondev_xz,
- mp->d_epsilondev_yz,
- mp->d_b_epsilondev_xx,
- mp->d_b_epsilondev_yy,
- mp->d_b_epsilondev_xy,
- mp->d_b_epsilondev_xz,
- mp->d_b_epsilondev_yz,
- mp->d_rho_kl,
- *deltat,
- mp->d_mu_kl,
- mp->d_kappa_kl,
- mp->d_epsilon_trace_over_3,
- mp->d_b_epsilon_trace_over_3,
- mp->NSPEC_AB,
- d_debug);
-
- cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
- cudaFree(d_debug);
- // for(int i=0;i<5;i++) {
- // printf("d_debug[%d]=%e\n",i,h_debug[i]);
- // }
- free(h_debug);
-
- // float* h_rho = (float*)malloc(sizeof(float)*mp->NSPEC_AB*125);
- // float maxval = 0;
- // cudaMemcpy(h_rho,mp->d_rho_kl,sizeof(float)*mp->NSPEC_AB*125,cudaMemcpyDeviceToHost);
- // int number_big_values = 0;
- // for(int i=0;i<mp->NSPEC_AB*125;i++) {
- // maxval = MAX(maxval,fabsf(h_rho[i]));
- // if(fabsf(h_rho[i]) > 1e10) {
- // number_big_values++;
- // }
- // }
-
- // printf("maval rho = %e, number>1e10 = %d vs. %d\n",maxval,number_big_values,mp->NSPEC_AB*125);
-
- #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- // sync and check to catch errors from previous async operations
- cudaThreadSynchronize();
- cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess)
- {
- fprintf(stderr,"Error launching/running compute_kernels_cudakernel: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-#endif
-
-
-}
-
-extern "C" void transfer_sensitivity_kernels_to_host_(long* Mesh_pointer, float* h_rho_kl,
- float* h_mu_kl, float* h_kappa_kl,
- float* h_Sigma_kl,int* NSPEC_AB,int* NSPEC_AB_VAL) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(h_rho_kl,mp->d_rho_kl,*NSPEC_AB*125*sizeof(float),
- cudaMemcpyDeviceToHost),1);
- print_CUDA_error_if_any(cudaMemcpy(h_mu_kl,mp->d_mu_kl,*NSPEC_AB*125*sizeof(float),
- cudaMemcpyDeviceToHost),1);
- print_CUDA_error_if_any(cudaMemcpy(h_kappa_kl,mp->d_kappa_kl,*NSPEC_AB*125*sizeof(float),
- cudaMemcpyDeviceToHost),1);
- print_CUDA_error_if_any(cudaMemcpy(h_Sigma_kl,mp->d_Sigma_kl,125*(*NSPEC_AB_VAL)*sizeof(float),
- cudaMemcpyHostToDevice),4);
-
-}
-
-__global__ void compute_kernels_strength_noise_cuda_kernel(float* displ, int* free_surface_ispec,int* free_surface_ijk, int* ibool, float* noise_surface_movie, float* normal_x_noise, float* normal_y_noise, float* normal_z_noise, float* Sigma_kl, float deltat,int num_free_surface_faces, float* d_debug) {
- int iface = blockIdx.x + blockIdx.y*gridDim.x;
- if(iface<num_free_surface_faces) {
-
- int ispec = free_surface_ispec[iface]-1;
- int igll = threadIdx.x;
- int ipoin = igll + 25*iface;
- int i = free_surface_ijk[INDEX3(3,25,0,igll,iface)]-1;
- int j = free_surface_ijk[INDEX3(3,25,0,igll,iface)]-1;
- int k = free_surface_ijk[INDEX3(3,25,0,igll,iface)]-1;
-
- int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-
- float eta = (noise_surface_movie[INDEX3(3,25,0,igll,iface)]*normal_x_noise[ipoin]+
- noise_surface_movie[INDEX3(3,25,1,igll,iface)]*normal_y_noise[ipoin]+
- noise_surface_movie[INDEX3(3,25,2,igll,iface)]*normal_z_noise[ipoin]);
-
- // if(ijk_ispec == 78496) {
- // d_debug[0] = Sigma_kl[ijk_ispec];
- // d_debug[1] = eta;
- // d_debug[2] = normal_x_noise[ipoin];
- // d_debug[3] = normal_y_noise[ipoin];
- // d_debug[4] = normal_z_noise[ipoin];
- // d_debug[5] = displ[3*iglob+2];
- // d_debug[6] = deltat*eta*normal_z_noise[ipoin]*displ[2+3*iglob];
- // d_debug[7] = 0.008*1.000000e-24*normal_z_noise[ipoin]*3.740546e-13;
- // }
-
- Sigma_kl[INDEX4(5,5,5,i,j,k,ispec)] += deltat*eta*(normal_x_noise[ipoin]*displ[3*iglob]+
- normal_y_noise[ipoin]*displ[1+3*iglob]+
- normal_z_noise[ipoin]*displ[2+3*iglob]);
- }
-
-
-}
-
-extern "C" void compute_kernels_strength_noise_cuda_(long* Mesh_pointer, float* h_noise_surface_movie,
- int* num_free_surface_faces_f,int* deltat) {
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- int num_free_surface_faces = *num_free_surface_faces_f;
-
- cudaMemcpy(mp->d_noise_surface_movie,h_noise_surface_movie,3*25*num_free_surface_faces*sizeof(float),cudaMemcpyHostToDevice);
-
-
- int num_blocks_x = num_free_surface_faces;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(25,1,1);
-
- // float* h_debug = (float*)calloc(128,sizeof(float));
- float* d_debug;
- // cudaMalloc((void**)&d_debug,128*sizeof(float));
- // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-
- compute_kernels_strength_noise_cuda_kernel<<<grid,threads>>>(mp->d_displ,
- mp->d_free_surface_ispec,
- mp->d_free_surface_ijk,
- mp->d_ibool,
- mp->d_noise_surface_movie,
- mp->d_normal_x_noise,
- mp->d_normal_y_noise,
- mp->d_normal_z_noise,
- mp->d_Sigma_kl,*deltat,
- num_free_surface_faces,
- d_debug);
-
- // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
- // for(int i=0;i<8;i++) {
- // printf("debug[%d]= %e\n",i,h_debug[i]);
- // }
-
- #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_kernels_strength_noise_cuda_kernel");
- #endif
-
-}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_acoustic.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -33,7 +33,8 @@
num_abs_boundary_faces,rhostore,kappastore,ispec_is_acoustic,&
SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it,NGLOB_ADJOINT, &
b_potential_dot_dot_acoustic,b_reclen_potential, &
- b_absorb_potential,b_num_abs_boundary_faces)
+ b_absorb_potential,b_num_abs_boundary_faces, &
+ GPU_MODE,Mesh_pointer)
implicit none
@@ -67,6 +68,10 @@
real(kind=CUSTOM_REAL),dimension(NGLLSQUARE,b_num_abs_boundary_faces):: b_absorb_potential
logical:: SAVE_FORWARD
+ ! GPU_MODE variables
+ integer(kind=8) :: Mesh_pointer
+ logical :: GPU_MODE
+
! local parameters
real(kind=CUSTOM_REAL) :: rhol,cpl,jacobianw,absorbl
integer :: ispec,iglob,i,j,k,iface,igll
@@ -87,54 +92,63 @@
endif !adjoint
! absorbs absorbing-boundary surface using Sommerfeld condition (vanishing field in the outer-space)
- do iface=1,num_abs_boundary_faces
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do iface=1,num_abs_boundary_faces
- ispec = abs_boundary_ispec(iface)
+ ispec = abs_boundary_ispec(iface)
- if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+ if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
- if( ispec_is_acoustic(ispec) ) then
+ if( ispec_is_acoustic(ispec) ) then
- ! reference gll points on boundary face
- do igll = 1,NGLLSQUARE
+ ! reference gll points on boundary face
+ do igll = 1,NGLLSQUARE
- ! gets local indices for GLL point
- i = abs_boundary_ijk(1,igll,iface)
- j = abs_boundary_ijk(2,igll,iface)
- k = abs_boundary_ijk(3,igll,iface)
+ ! gets local indices for GLL point
+ i = abs_boundary_ijk(1,igll,iface)
+ j = abs_boundary_ijk(2,igll,iface)
+ k = abs_boundary_ijk(3,igll,iface)
- ! gets global index
- iglob=ibool(i,j,k,ispec)
+ ! gets global index
+ iglob=ibool(i,j,k,ispec)
- ! determines bulk sound speed
- rhol = rhostore(i,j,k,ispec)
- cpl = sqrt( kappastore(i,j,k,ispec) / rhol )
+ ! determines bulk sound speed
+ rhol = rhostore(i,j,k,ispec)
+ cpl = sqrt( kappastore(i,j,k,ispec) / rhol )
- ! gets associated, weighted jacobian
- jacobianw = abs_boundary_jacobian2Dw(igll,iface)
+ ! gets associated, weighted jacobian
+ jacobianw = abs_boundary_jacobian2Dw(igll,iface)
- ! Sommerfeld condition
- absorbl = potential_dot_acoustic(iglob) * jacobianw / cpl / rhol
- potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
- - absorbl
+ ! Sommerfeld condition
+ absorbl = potential_dot_acoustic(iglob) * jacobianw / cpl / rhol
+ potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) &
+ - absorbl
- ! adjoint simulations
- if (SIMULATION_TYPE == 3) then
- b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
- - b_absorb_potential(igll,iface)
+ ! adjoint simulations
+ if (SIMULATION_TYPE == 3) then
+ b_potential_dot_dot_acoustic(iglob) = b_potential_dot_dot_acoustic(iglob) &
+ - b_absorb_potential(igll,iface)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
- b_absorb_potential(igll,iface) = absorbl
+ else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ b_absorb_potential(igll,iface) = absorbl
- endif !adjoint
+ endif !adjoint
- enddo
+ enddo
- endif ! ispec_is_acoustic
- endif ! ispec_is_inner
- enddo ! num_abs_boundary_faces
+ endif ! ispec_is_acoustic
+ endif ! ispec_is_inner
+ enddo ! num_abs_boundary_faces
+ else
+ ! GPU_MODE == .true.
+ call compute_stacey_acoustic_cuda(Mesh_pointer, phase_is_inner, &
+ num_abs_boundary_faces, SIMULATION_TYPE, &
+ SAVE_FORWARD,b_absorb_potential)
+ endif
+
! adjoint simulations: stores absorbed wavefield part
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) then
! writes out absorbing boundary value only when second phase is running
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -36,7 +36,8 @@
veloc,rho_vp,rho_vs, &
ispec_is_elastic,SIMULATION_TYPE,SAVE_FORWARD, &
NSTEP,it,NGLOB_ADJOINT,b_accel, &
- b_num_abs_boundary_faces,b_reclen_field,b_absorb_field,GPU_MODE,Mesh_pointer)
+ b_num_abs_boundary_faces,b_reclen_field,b_absorb_field, &
+ GPU_MODE,Mesh_pointer)
implicit none
@@ -159,10 +160,12 @@
endif ! ispec_is_inner
enddo
- else ! GPU_MODE == .true.
- call compute_stacey_elastic_cuda(Mesh_pointer, NSPEC_AB, NGLOB_AB, phase_is_inner,&
+ else
+ ! GPU_MODE == .true.
+ call compute_stacey_elastic_cuda(Mesh_pointer, NSPEC_AB, NGLOB_AB, phase_is_inner,&
num_abs_boundary_faces, SIMULATION_TYPE, NSTEP, NGLOB_ADJOINT,&
- b_num_abs_boundary_faces, b_reclen_field,b_absorb_field, SAVE_FORWARD, NGLLSQUARE,it)
+ b_num_abs_boundary_faces, b_reclen_field,b_absorb_field, &
+ SAVE_FORWARD,it)
endif
! adjoint simulations: stores absorbed wavefield part
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic_cuda.cu 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_stacey_elastic_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,190 +0,0 @@
-#include <stdio.h>
-#include <cuda.h>
-#include <cublas.h>
-#include <mpi.h>
-
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include "mesh_constants_cuda.h"
-
-typedef float real; //type of variables passed into function
-typedef float realw; //type of "working" variables
-
-#define MAXDEBUG 1
-#define ENABLE_VERY_SLOW_ERROR_CHECKING
-
-#if MAXDEBUG == 1
-#define LOG(x) printf("%s\n",x)
-#define PRINT5(var,offset) for(;print_count<5;print_count++) printf("var(%d)=%2.20f\n",print_count,var[offset+print_count]);
-#define PRINT10(var) if(print_count<10) { printf("var=%1.20e\n",var); print_count++; }
-#define PRINT10i(var) if(print_count<10) { printf("var=%d\n",var); print_count++; }
-#else
-#define LOG(x) // printf("%s\n",x);
-#define PRINT5(var,offset) // for(i=0;i<10;i++) printf("var(%d)=%f\n",i,var[offset+i]);
-#endif
-
-#define INDEX2(xsize,x,y) x + (y)*xsize
-#define INDEX3(xsize,ysize,x,y,z) x + (y)*xsize + (z)*xsize*ysize
-#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize
-#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize + (j)*xsize*ysize*zsize*isize
-
-#define NDIM 3
-#define NGLLX 5
-
-__global__ void compute_stacey_elastic_kernel(real* veloc, real* accel, real* b_accel, int* abs_boundary_ispec,
- int* abs_boundary_ijk, int* ibool,
- real* abs_boundary_normal,
- real* rho_vp, real* rho_vs,
- real* abs_boundary_jacobian2Dw,
- real* b_absorb_field,int NGLLSQUARE,
- int* ispec_is_inner, int* ispec_is_elastic,
- int phase_is_inner,float* debug_val,int* debug_val_int,
- int num_abs_boundary_faces,
- int SAVE_FORWARD,int SIMULATION_TYPE) {
-
- int igll = threadIdx.x; // tx
- int iface = blockIdx.x + gridDim.x*blockIdx.y; // bx
- int i;
- int j;
- int k;
- int iglob;
- int ispec;
- realw vx,vy,vz,vn;
- realw nx,ny,nz;
- realw rho_vp_temp,rho_vs_temp;
- realw tx,ty,tz;
- realw jacobianw;
- // don't compute points outside NGLLSQUARE=25
-
-
-
- if(igll<NGLLSQUARE && iface < num_abs_boundary_faces) {
-
- // "-1" from index values to convert from Fortran-> C indexing
- ispec = abs_boundary_ispec[iface]-1;
- i = abs_boundary_ijk[INDEX3(NDIM,NGLLSQUARE,0,igll,iface)]-1;
- j = abs_boundary_ijk[INDEX3(NDIM,NGLLSQUARE,1,igll,iface)]-1;
- k = abs_boundary_ijk[INDEX3(NDIM,NGLLSQUARE,2,igll,iface)]-1;
- iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
-
- if(ispec_is_inner[ispec] == phase_is_inner && ispec_is_elastic[ispec]==1) {
-
- i = abs_boundary_ijk[INDEX3(NDIM,NGLLSQUARE,0,igll,iface)]-1;
- j = abs_boundary_ijk[INDEX3(NDIM,NGLLSQUARE,1,igll,iface)]-1;
- k = abs_boundary_ijk[INDEX3(NDIM,NGLLSQUARE,2,igll,iface)]-1;
- iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
-
- // gets associated velocity
-
- vx = veloc[iglob*3+0];
- vy = veloc[iglob*3+1];
- vz = veloc[iglob*3+2];
-
- // gets associated normal
- nx = abs_boundary_normal[INDEX3(NDIM,NGLLSQUARE,0,igll,iface)];
- ny = abs_boundary_normal[INDEX3(NDIM,NGLLSQUARE,1,igll,iface)];
- nz = abs_boundary_normal[INDEX3(NDIM,NGLLSQUARE,2,igll,iface)];
-
- // // velocity component in normal direction (normal points out of element)
- vn = vx*nx + vy*ny + vz*nz;
- rho_vp_temp = rho_vp[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
- rho_vs_temp = rho_vs[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
- tx = rho_vp_temp*vn*nx + rho_vs_temp*(vx-vn*nx);
- ty = rho_vp_temp*vn*ny + rho_vs_temp*(vy-vn*ny);
- tz = rho_vp_temp*vn*nz + rho_vs_temp*(vz-vn*nz);
-
- jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLLSQUARE,igll,iface)];
-
- atomicAdd(&accel[iglob*3],-tx*jacobianw);
- atomicAdd(&accel[iglob*3+1],-ty*jacobianw);
- atomicAdd(&accel[iglob*3+2],-tz*jacobianw);
-
- if(SIMULATION_TYPE == 3) {
- atomicAdd(&b_accel[iglob*3 ],-b_absorb_field[0+3*(igll+25*(iface))]);
- atomicAdd(&b_accel[iglob*3+1],-b_absorb_field[1+3*(igll+25*(iface))]);
- atomicAdd(&b_accel[iglob*3+2],-b_absorb_field[2+3*(igll+25*(iface))]);
- }
- else if(SAVE_FORWARD && SIMULATION_TYPE == 1) {
- b_absorb_field[0+3*(igll+25*(iface))] = tx*jacobianw;
- b_absorb_field[1+3*(igll+25*(iface))] = ty*jacobianw;
- b_absorb_field[2+3*(igll+25*(iface))] = tz*jacobianw;
- }
-
- }
- }
-
-}
-
-#define FC_FUNC(name,NAME) name ## _
-#define FC_FUNC_(name,NAME) name ## _
-
-extern "C" void
-FC_FUNC_(write_abs,WRITE_ABS)(int *fid, char *buffer, int *length , int *index);
-extern "C" void
-FC_FUNC_(read_abs,READ_ABS)(int *fid, char *buffer, int *length , int *index);
-
-extern "C" void compute_stacey_elastic_cuda_(long* Mesh_pointer_f, int* NSPEC_ABf, int* NGLOB_ABf, int* phase_is_innerf, int* num_abs_boundary_facesf, int* SIMULATION_TYPEf, int* NSTEPf, int* NGLOB_ADJOINTf, int* b_num_abs_boundary_facesf, int* b_reclen_fieldf,float* b_absorb_field, int* SAVE_FORWARDf, int* NGLLSQUAREf,int* itf) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- int fid = 0;
- int it = *itf;
- int NSPEC_AB = *NSPEC_ABf;
- int NGLOB_AB = *NGLOB_ABf;
- int NGLLSQUARE = *NGLLSQUAREf;
- int phase_is_inner = *phase_is_innerf;
- int num_abs_boundary_faces = *num_abs_boundary_facesf;
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
- int NSTEP = *NSTEPf;
- int myrank; MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
- int NGLOB_ADJOINT = *NGLOB_ADJOINTf;
- int b_num_abs_boundary_faces = *b_num_abs_boundary_facesf;
- int b_reclen_field = *b_reclen_fieldf;
- int SAVE_FORWARD = *SAVE_FORWARDf;
-
- int blocksize = 32; // > NGLLSQUARE=25, but we handle this inside kernel
- int num_blocks_x = num_abs_boundary_faces;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
- float* d_debug_val;
- int* d_debug_val_int;
-
- if(SIMULATION_TYPE == 3 && num_abs_boundary_faces > 0) {
- // int val = NSTEP-it+1;
- // read_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&val);
- // The read is done in fortran
- cudaMemcpy(mp->d_b_absorb_field,b_absorb_field,b_reclen_field,cudaMemcpyHostToDevice);
- }
-
- #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("between cudamemcpy and compute_stacey_elastic_kernel");
- #endif
-
- compute_stacey_elastic_kernel<<<grid,threads>>>(mp->d_veloc,mp->d_accel,mp->d_b_accel,mp->d_abs_boundary_ispec, mp->d_abs_boundary_ijk, mp->d_ibool, mp->d_abs_boundary_normal, mp->d_rho_vp, mp->d_rho_vs, mp->d_abs_boundary_jacobian2Dw, mp->d_b_absorb_field,NGLLSQUARE,mp->d_ispec_is_inner, mp->d_ispec_is_elastic, phase_is_inner,d_debug_val,d_debug_val_int,num_abs_boundary_faces,SAVE_FORWARD,SIMULATION_TYPE);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_stacey_elastic_kernel");
-#endif
-
- // ! adjoint simulations: stores absorbed wavefield part
- // if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. num_abs_boundary_faces > 0 ) &
- // write(IOABS,rec=it) b_reclen_field,b_absorb_field,b_reclen_field
-
- if(SIMULATION_TYPE==1 && SAVE_FORWARD && num_abs_boundary_faces>0) {
- cudaMemcpy(b_absorb_field,mp->d_b_absorb_field,b_reclen_field,cudaMemcpyDeviceToHost);
- // The write is done in fortran
- // write_abs_(&fid,(char*)b_absorb_field,&b_reclen_field,&it);
- }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after compute_stacey_elastic after cudamemcpy");
-#endif
-}
-
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/finalize_simulation.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -37,16 +37,7 @@
integer :: irec_local
! save last frame
-
- if(GPU_MODE) then
- call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
- if(SIMULATION_TYPE==3) then
- call transfer_b_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
- call transfer_sensitivity_kernels_to_host(Mesh_pointer, rho_kl, mu_kl, kappa_kl,Sigma_kl,&
- NSPEC_AB)
- endif
- endif
-
+
if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
open(unit=27,file=prname(1:len_trim(prname))//'save_forward_arrays.bin',&
status='unknown',form='unformatted')
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -205,6 +205,11 @@
stop 'Deville et al. (2002) routines can only be used if NGLLX = NGLLY = NGLLZ is in [5-10]'
endif
+ if( GPU_MODE ) then
+ if( NGLLX /= 5 .or. NGLLY /= 5 .or. NGLLZ /= 5 ) &
+ stop 'GPU mode can only be used if NGLLX == NGLLY == NGLLZ == 5'
+ endif
+
! absorbing surfaces
if( ABSORBING_CONDITIONS ) then
! for arbitrary orientation of elements, which face belongs to xmin,xmax,etc... -
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/it_update_displacement_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/it_update_displacement_cuda.cu 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/it_update_displacement_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,114 +0,0 @@
-#include <stdio.h>
-#include <cuda.h>
-#include <cublas.h>
-#include <mpi.h>
-
-#include "mesh_constants_cuda.h"
-
-#define ENABLE_VERY_SLOW_ERROR_CHECKING
-
-
-// typedef struct mesh_ {
-
-// int NGLLX; int NSPEC_AB;
-// int NGLOB_AB;
-// float* d_xix; float* d_xiy; float* d_xiz;
-// float* d_etax; float* d_etay; float* d_etaz;
-// float* d_gammax; float* d_gammay; float* d_gammaz;
-// float* d_kappav; float* d_muv;
-// int* d_ibool;
-// float* d_displ; float* d_veloc; float* d_accel;
-// int* d_phase_ispec_inner_elastic;
-// int d_num_phase_ispec_elastic;
-// float* d_rmass;
-
-// } Mesh;
-
-typedef float real;
-
-
- __global__ void UpdateDispVeloc_kernel(real* displ, real* veloc,
- real* accel, int size,
- real deltat, real deltatsqover2, real deltatover2) {
- int id = threadIdx.x + blockIdx.x*blockDim.x + blockIdx.y*gridDim.x*blockDim.x;
-
- /* because of block and grid sizing problems, there is a small */
- /* amount of buffer at the end of the calculation */
- if(id < size) {
- displ[id] = displ[id] + deltat*veloc[id] + deltatsqover2*accel[id];
- veloc[id] = veloc[id] + deltatover2*accel[id];
- accel[id] = 0; // can do this using memset...not sure if faster
- }
- }
-#define CUBLAS_ERROR(s,n) if (s != CUBLAS_STATUS_SUCCESS) { \
- fprintf (stderr, "CUBLAS Memory Write Error @ %d\n",n); \
- exit(EXIT_FAILURE); }
-
-
-extern "C" void it_update_displacement_scheme_cuda_(long* Mesh_pointer_f,int* size_F, float* deltat_F, float* deltatsqover2_F, float* deltatover2_F,int* SIMULATION_TYPE, float* b_deltat_F, float* b_deltatsqover2_F, float* b_deltatover2_F) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-
- int i,device;
-
- int size = *size_F;
- real deltat = *deltat_F;
- real deltatsqover2 = *deltatsqover2_F;
- real deltatover2 = *deltatover2_F;
- real b_deltat = *b_deltat_F;
- real b_deltatsqover2 = *b_deltatsqover2_F;
- real b_deltatover2 = *b_deltatover2_F;
- cublasStatus status;
-
- int blocksize = 128;
- int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
-
- int num_blocks_x = size_padded/blocksize;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
-
- exit_on_cuda_error("Before UpdateDispVeloc_kernel");
-
- //launch kernel
- UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ,mp->d_veloc,mp->d_accel,
- size,deltat,deltatsqover2,deltatover2);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- // sync and check to catch errors from previous async operations
- exit_on_cuda_error("UpdateDispVeloc_kernel");
-#endif
-
-
- // kernel for backward fields
- if(*SIMULATION_TYPE == 3) {
-
- UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_b_displ,mp->d_b_veloc,mp->d_b_accel,
- size,b_deltat, b_deltatsqover2, b_deltatover2);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- //printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- // sync and check to catch errors from previous async operations
- cudaThreadSynchronize();
- MPI_Barrier(MPI_COMM_WORLD);
- cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess)
- {
- fprintf(stderr,"Error after SIM_TYPE==3 UpdateDispVeloc_kernel: %s\n", cudaGetErrorString(err));
- exit(1);
- }
-#endif
-
- }
-
- cudaThreadSynchronize();
-
-}
-
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -125,6 +125,9 @@
!
enddo ! end of main time loop
+ ! Transfer fields from GPU card to host for further analysis
+ if(GPU_MODE) call it_transfer_from_GPU()
+
end subroutine iterate_time
@@ -147,9 +150,15 @@
ihours_total,iminutes_total,iseconds_total,int_t_total
if(GPU_MODE) then
- call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
- if(SIMULATION_TYPE==3) &
- call transfer_b_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
+ ! way 1: copy whole fields
+ ! elastic wavefield
+ if( ELASTIC_SIMULATION ) then
+ call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ ! backward/reconstructed wavefield
+ if(SIMULATION_TYPE==3) &
+ call transfer_b_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
+ endif
+
endif
! compute maximum of norm of displacement in each slice
@@ -157,7 +166,12 @@
Usolidnorm = maxval(sqrt(displ(1,:)**2 + displ(2,:)**2 + displ(3,:)**2))
else
if( ACOUSTIC_SIMULATION ) then
- Usolidnorm = maxval(abs(potential_dot_dot_acoustic(:)))
+ if(GPU_MODE) then
+ ! way 2: just get maximum of field from GPU
+ call get_norm_acoustic_from_device_cuda(Usolidnorm,Mesh_pointer,1)
+ else
+ Usolidnorm = maxval(abs(potential_dot_dot_acoustic(:)))
+ endif
endif
endif
@@ -170,7 +184,12 @@
b_Usolidnorm = maxval(sqrt(b_displ(1,:)**2 + b_displ(2,:)**2 + b_displ(3,:)**2))
else
if( ACOUSTIC_SIMULATION ) then
- b_Usolidnorm = maxval(abs(b_potential_dot_dot_acoustic(:)))
+ ! way 2
+ if(GPU_MODE) then
+ call get_norm_acoustic_from_device_cuda(b_Usolidnorm,Mesh_pointer,3)
+ else
+ b_Usolidnorm = maxval(abs(b_potential_dot_dot_acoustic(:)))
+ endif
endif
endif
call max_all_cr(b_Usolidnorm,b_Usolidnorm_all)
@@ -306,15 +325,27 @@
! updates acoustic potentials
if( ACOUSTIC_SIMULATION ) then
- potential_acoustic(:) = potential_acoustic(:) &
+
+ if(.NOT. GPU_MODE) then
+ potential_acoustic(:) = potential_acoustic(:) &
+ deltat * potential_dot_acoustic(:) &
+ deltatsqover2 * potential_dot_dot_acoustic(:)
- potential_dot_acoustic(:) = potential_dot_acoustic(:) &
+ potential_dot_acoustic(:) = potential_dot_acoustic(:) &
+ deltatover2 * potential_dot_dot_acoustic(:)
- potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
+ potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
+ else
+ ! on GPU
+ call it_update_displacement_scheme_acoustic_cuda(Mesh_pointer, NGLOB_AB, &
+ deltat, deltatsqover2, deltatover2, &
+ SIMULATION_TYPE, b_deltat, b_deltatsqover2, b_deltatover2)
+ endif
! time marching potentials
- if(PML) call PML_acoustic_time_march(NSPEC_AB,NGLOB_AB,ibool,&
+ if(PML) then
+ if( GPU_MODE ) call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
+ call PML_acoustic_time_march(NSPEC_AB,NGLOB_AB,ibool,&
potential_acoustic,potential_dot_acoustic,&
deltat,deltatsqover2,deltatover2,&
num_PML_ispec,PML_ispec,PML_damping_d,&
@@ -326,10 +357,16 @@
nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
my_neighbours_ext_mesh,NPROC,&
ispec_is_acoustic)
- endif
+ if( GPU_MODE ) call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ endif
+
+ endif ! ACOUSTIC_SIMULATION
+
! updates elastic displacement and velocity
if( ELASTIC_SIMULATION ) then
+
if(.NOT. GPU_MODE) then
displ(:,:) = displ(:,:) + deltat*veloc(:,:) + deltatsqover2*accel(:,:)
veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
@@ -352,6 +389,7 @@
+ b_deltatover2 * b_potential_dot_dot_acoustic(:)
b_potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
endif
+
! elastic backward fields
if( ELASTIC_SIMULATION ) then
b_displ(:,:) = b_displ(:,:) + b_deltat*b_veloc(:,:) + b_deltatsqover2*b_accel(:,:)
@@ -399,6 +437,11 @@
read(27) b_potential_acoustic
read(27) b_potential_dot_acoustic
read(27) b_potential_dot_dot_acoustic
+
+ ! transfers fields onto GPU
+ if(GPU_MODE) &
+ call transfer_b_fields_acoustic_to_device(NGLOB_AB,b_potential_acoustic, &
+ b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
endif
! elastic wavefields
@@ -407,25 +450,33 @@
read(27) b_veloc
read(27) b_accel
+ ! puts elastic wavefield to GPU
+ if(GPU_MODE) &
+ call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
+
! memory variables if attenuation
if( ATTENUATION ) then
- read(27) b_R_xx
- read(27) b_R_yy
- read(27) b_R_xy
- read(27) b_R_xz
- read(27) b_R_yz
- read(27) b_epsilondev_xx
- read(27) b_epsilondev_yy
- read(27) b_epsilondev_xy
- read(27) b_epsilondev_xz
- read(27) b_epsilondev_yz
+ read(27) b_R_xx
+ read(27) b_R_yy
+ read(27) b_R_xy
+ read(27) b_R_xz
+ read(27) b_R_yz
+ read(27) b_epsilondev_xx
+ read(27) b_epsilondev_yy
+ read(27) b_epsilondev_xy
+ read(27) b_epsilondev_xz
+ read(27) b_epsilondev_yz
+
+ ! puts elastic attenuation arrays to GPU
+ ! daniel: TODO transfer R_xx,... and epsilondev_xx,... as well
+ if(GPU_MODE) &
+ call exit_MPI(myrank,'read forward arrays: not fully implemented yet for elastic domains with attenuation')
+
endif
endif
close(27)
- if(GPU_MODE) &
- call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc, b_accel,Mesh_pointer)
end subroutine it_read_foward_arrays
@@ -455,6 +506,7 @@
open(unit=27,file=trim(prname_Q)//trim(outputname),status='old',&
action='read',form='unformatted')
if( ELASTIC_SIMULATION ) then
+ ! reads arrays from disk files
read(27) b_displ
read(27) b_veloc
read(27) b_accel
@@ -468,11 +520,27 @@
read(27) b_epsilondev_xy
read(27) b_epsilondev_xz
read(27) b_epsilondev_yz
+
+ ! puts elastic fields onto GPU
+ if(GPU_MODE) then
+ call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
+
+ ! daniel: TODO transfer R_xx,... and epsilondev_xx,... as well
+ call exit_MPI(myrank,'store attenuation arrays: not fully implemented yet for elastic domains')
+ endif
endif
+
if( ACOUSTIC_SIMULATION ) then
+ ! reads arrays from disk files
read(27) b_potential_acoustic
read(27) b_potential_dot_acoustic
read(27) b_potential_dot_dot_acoustic
+
+ ! puts acoustic fields onto GPU
+ if(GPU_MODE) &
+ call transfer_b_fields_acoustic_to_device(NGLOB_AB,b_potential_acoustic, &
+ b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+
endif
close(27)
else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. mod(it,NSTEP_Q_SAVE) == 0) then
@@ -481,9 +549,19 @@
open(unit=27,file=trim(prname_Q)//trim(outputname),status='unknown',&
action='write',form='unformatted')
if( ELASTIC_SIMULATION ) then
+ ! gets elastic fields from GPU onto CPU
+ if(GPU_MODE) then
+ call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+
+ ! daniel: TODO transfer R_xx,... and epsilondev_xx,... as well
+ call exit_MPI(myrank,'store attenuation arrays: not fully implemented yet for elastic domains')
+ endif
+
+ ! writes to disk file
write(27) displ
write(27) veloc
write(27) accel
+
write(27) R_xx
write(27) R_yy
write(27) R_xy
@@ -496,6 +574,12 @@
write(27) epsilondev_yz
endif
if( ACOUSTIC_SIMULATION ) then
+ ! gets acoustic fields from GPU onto CPU
+ if(GPU_MODE) &
+ call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
+ ! writes to disk file
write(27) potential_acoustic
write(27) potential_dot_acoustic
write(27) potential_dot_dot_acoustic
@@ -505,3 +589,39 @@
endif ! it
end subroutine it_store_attenuation_arrays
+
+
+!=====================================================================
+
+ subroutine it_transfer_from_GPU()
+
+! transfers fields on GPU back onto CPU
+
+ use specfem_par
+ use specfem_par_elastic
+ use specfem_par_acoustic
+
+ implicit none
+
+ ! acoustic potentials
+ if( ACOUSTIC_SIMULATION ) then
+ call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+ if( SIMULATION_TYPE == 3 ) then
+ call transfer_b_fields_acoustic_from_device(NGLOB_AB,b_potential_acoustic, &
+ b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+ call transfer_sensitivity_kernels_acoustic_to_host(Mesh_pointer,rho_ac_kl,kappa_ac_kl,NSPEC_AB)
+ endif
+ endif
+
+ ! elastic wavefield
+ if( ELASTIC_SIMULATION ) then
+ call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ if( SIMULATION_TYPE == 3 ) then
+ call transfer_b_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
+ call transfer_sensitivity_kernels_to_host(Mesh_pointer, rho_kl, mu_kl, kappa_kl,Sigma_kl, &
+ NSPEC_AB)
+ endif
+ endif
+
+ end subroutine it_transfer_from_GPU
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/mesh_constants_cuda.h 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/mesh_constants_cuda.h 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,93 +0,0 @@
-#ifndef GPU_MESH_
-#define GPU_MESH_
-#include <sys/types.h>
-#include <unistd.h>
-
-typedef struct mesh_ {
-
- int NGLLX; int NSPEC_AB;
- int NGLOB_AB;
- float* d_xix; float* d_xiy; float* d_xiz;
- float* d_etax; float* d_etay; float* d_etaz;
- float* d_gammax; float* d_gammay; float* d_gammaz;
- float* d_kappav; float* d_muv;
- int* d_ibool;
- float* d_displ; float* d_veloc; float* d_accel;
- float* d_b_displ; float* d_b_veloc; float* d_b_accel;
- int* d_phase_ispec_inner_elastic;
- int d_num_phase_ispec_elastic;
- float* d_rmass;
- float* d_send_accel_buffer;
- int* d_nibool_interfaces_ext_mesh;
- int* d_ibool_interfaces_ext_mesh;
-
- // used for writing seismograms
- int* d_number_receiver_global;
- int* d_ispec_selected_rec;
- int nrec_local;
- float* d_station_seismo_field;
- float* h_station_seismo_field;
-
- //used for absorbing stacey boundaries
- int* d_abs_boundary_ispec;
- int* d_abs_boundary_ijk;
- float* d_abs_boundary_normal;
- float* d_rho_vp;
- float* d_rho_vs;
- float* d_abs_boundary_jacobian2Dw;
- float* d_b_absorb_field;
- int* d_ispec_is_inner;
- int* d_ispec_is_elastic;
- float* d_sourcearrays;
- double* d_stf_pre_compute;
- int* d_islice_selected_source;
- int* d_ispec_selected_source;
-
- int* d_islice_selected_rec;
-
- // surface elements to save for noise tomography
- int* d_free_surface_ispec;
- int* d_free_surface_ijk;
- int num_free_surface_faces;
- float* d_noise_surface_movie;
-
- float* d_epsilondev_xx;
- float* d_epsilondev_yy;
- float* d_epsilondev_xy;
- float* d_epsilondev_xz;
- float* d_epsilondev_yz;
- float* d_epsilon_trace_over_3;
-
- float* d_normal_x_noise;
- float* d_normal_y_noise;
- float* d_normal_z_noise;
- float* d_mask_noise;
- float* d_free_surface_jacobian2Dw;
-
- float* d_wgllwgll_xy;
- float* d_wgllwgll_xz;
- float* d_wgllwgll_yz;
-
- float* d_noise_sourcearray;
-
- float* d_b_epsilondev_xx;
- float* d_b_epsilondev_yy;
- float* d_b_epsilondev_xy;
- float* d_b_epsilondev_xz;
- float* d_b_epsilondev_yz;
- float* d_b_epsilon_trace_over_3;
-
- // sensitivity kernels
- float* d_rho_kl;
- float* d_mu_kl;
- float* d_kappa_kl;
- float* d_Sigma_kl;
-
-
-} Mesh;
-
-void pause_for_debugger(int pause);
-
-void exit_on_cuda_error(char* kernel_name);
-
-#endif
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography_cuda.cu 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,241 +0,0 @@
-#include <stdio.h>
-#include <cuda.h>
-#include <cublas.h>
-#include <mpi.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "mesh_constants_cuda.h"
-
-// #include "epik_user.h"
-
-#define INDEX2(xsize,x,y) x + (y)*xsize
-#define INDEX3(xsize,ysize,x,y,z) x + (y)*xsize + (z)*xsize*ysize
-#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize
-#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize + (j)*xsize*ysize*zsize*isize
-
-typedef float real;
-
-#define ENABLE_VERY_SLOW_ERROR_CHECKING
-
-__global__ void transfer_surface_to_host_kernel(int* free_surface_ispec,int* free_surface_ijk, int num_free_surface_faces, int* ibool, real* displ, real* noise_surface_movie) {
- int igll = threadIdx.x;
- int iface = blockIdx.x + blockIdx.y*gridDim.x;
-
- // int id = tx + blockIdx.x*blockDim.x + blockIdx.y*blockDim.x*gridDim.x;
-
- if(iface < num_free_surface_faces) {
- int ispec = free_surface_ispec[iface]-1; //-1 for C-based indexing
-
- int i = free_surface_ijk[0+3*(igll + 25*(iface))]-1;
- int j = free_surface_ijk[1+3*(igll + 25*(iface))]-1;
- int k = free_surface_ijk[2+3*(igll + 25*(iface))]-1;
-
- int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-
- noise_surface_movie[INDEX3(3,25,0,igll,iface)] = displ[iglob*3];
- noise_surface_movie[INDEX3(3,25,1,igll,iface)] = displ[iglob*3+1];
- noise_surface_movie[INDEX3(3,25,2,igll,iface)] = displ[iglob*3+2];
- }
-}
-
-extern "C" void fortranflush_(int* rank)
-{
-
- fflush(stdout);
- fflush(stderr);
- printf("Flushing proc %d!\n",*rank);
-}
-
-extern "C" void fortranprint_(int* id) {
- int procid;
- MPI_Comm_rank(MPI_COMM_WORLD,&procid);
- printf("%d: sends msg_id %d\n",procid,*id);
-}
-
-extern "C" void fortranprintf_(float* val) {
- int procid;
- MPI_Comm_rank(MPI_COMM_WORLD,&procid);
- printf("%d: sends val %e\n",procid,*val);
-}
-
-extern "C" void fortranprintd_(double* val) {
- int procid;
- MPI_Comm_rank(MPI_COMM_WORLD,&procid);
- printf("%d: sends val %e\n",procid,*val);
-}
-
-// randomize displ for testing
-extern "C" void make_displ_rand_(long* Mesh_pointer_f,float* h_displ) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
- // float* displ_rnd = (float*)malloc(mp->NGLOB_AB*3*sizeof(float));
- for(int i=0;i<mp->NGLOB_AB*3;i++) {
- h_displ[i] = rand();
- }
- cudaMemcpy(mp->d_displ,h_displ,mp->NGLOB_AB*3*sizeof(float),cudaMemcpyHostToDevice);
-}
-
-extern "C" void transfer_surface_to_host_(long* Mesh_pointer_f,real* h_noise_surface_movie,int* num_free_surface_faces) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
- int num_blocks_x = *num_free_surface_faces;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
- dim3 grid(num_blocks_x,num_blocks_y,1);
- dim3 threads(25,1,1);
-
- transfer_surface_to_host_kernel<<<grid,threads>>>(mp->d_free_surface_ispec,mp->d_free_surface_ijk, *num_free_surface_faces, mp->d_ibool, mp->d_displ, mp->d_noise_surface_movie);
-
- cudaMemcpy(h_noise_surface_movie,mp->d_noise_surface_movie,3*25*(*num_free_surface_faces)*sizeof(real),cudaMemcpyDeviceToHost);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- // sync and check to catch errors from previous async operations
- cudaThreadSynchronize();
- exit_on_cuda_error("transfer_surface_to_host");
-#endif
-
-}
-
-
-
-__global__ void noise_read_add_surface_movie_cuda_kernel(real* accel, int* ibool, int* free_surface_ispec,int* free_surface_ijk, int num_free_surface_faces, real* noise_surface_movie, real* normal_x_noise, real* normal_y_noise, real* normal_z_noise, real* mask_noise, real* free_surface_jacobian2Dw, real* wgllwgll_xy,float* d_debug) {
-
- int iface = blockIdx.x + gridDim.x*blockIdx.y; // surface element id
-
- // when nspec_top > 65535, but mod(nspec_top,2) > 0, we end up with an extra block.
- if(iface < num_free_surface_faces) {
- int ispec = free_surface_ispec[iface]-1;
-
- int igll = threadIdx.x;
-
- int ipoin = 25*iface + igll;
- int i=free_surface_ijk[0+3*(igll + 25*(iface))]-1;
- int j=free_surface_ijk[1+3*(igll + 25*(iface))]-1;
- int k=free_surface_ijk[2+3*(igll + 25*(iface))]-1;
-
- int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
-
- real normal_x = normal_x_noise[ipoin];
- real normal_y = normal_y_noise[ipoin];
- real normal_z = normal_z_noise[ipoin];
-
- real eta = (noise_surface_movie[INDEX3(3,25,0,igll,iface)]*normal_x +
- noise_surface_movie[INDEX3(3,25,1,igll,iface)]*normal_y +
- noise_surface_movie[INDEX3(3,25,2,igll,iface)]*normal_z);
-
- // error from cuda-memcheck and ddt seems "incorrect", because we
- // are passing a __constant__ variable pointer around like it was
- // made using cudaMalloc, which *may* be "incorrect", but produces
- // correct results.
-
- // ========= Invalid __global__ read of size
- // 4 ========= at 0x00000cd8 in
- // compute_add_sources_cuda.cu:260:noise_read_add_surface_movie_cuda_kernel
- // ========= by thread (0,0,0) in block (3443,0) ========= Address
- // 0x203000c8 is out of bounds
-
- // non atomic version for speed testing -- atomic updates are needed for correctness
- // accel[3*iglob] += eta*mask_noise[ipoin] * normal_x * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + 25*ispec2D];
- // accel[3*iglob+1] += eta*mask_noise[ipoin] * normal_y * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + 25*ispec2D];
- // accel[3*iglob+2] += eta*mask_noise[ipoin] * normal_z * wgllwgll_xy[tx] * free_surface_jacobian2Dw[tx + 25*ispec2D];
-
- // Fortran version in SVN -- note deletion of wgllwgll_xy?
- // accel(1,iglob) = accel(1,iglob) + eta * mask_noise(ipoin) * normal_x_noise(ipoin) &
- // * free_surface_jacobian2Dw(igll,iface)
- // accel(2,iglob) = accel(2,iglob) + eta * mask_noise(ipoin) * normal_y_noise(ipoin) &
- // * free_surface_jacobian2Dw(igll,iface)
- // accel(3,iglob) = accel(3,iglob) + eta * mask_noise(ipoin) * normal_z_noise(ipoin) &
- // * free_surface_jacobian2Dw(igll,iface) ! wgllwgll_xy(i,j) * jacobian2D_top(i,j,iface)
-
- // atomicAdd(&accel[iglob*3] ,eta*mask_noise[ipoin]*normal_x*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
- // atomicAdd(&accel[iglob*3+1],eta*mask_noise[ipoin]*normal_y*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
- // atomicAdd(&accel[iglob*3+2],eta*mask_noise[ipoin]*normal_z*wgllwgll_xy[tx]*free_surface_jacobian2Dw[igll+25*iface]);
-
- atomicAdd(&accel[iglob*3] ,eta*mask_noise[ipoin]*normal_x*free_surface_jacobian2Dw[igll+25*iface]);
- atomicAdd(&accel[iglob*3+1],eta*mask_noise[ipoin]*normal_y*free_surface_jacobian2Dw[igll+25*iface]);
- atomicAdd(&accel[iglob*3+2],eta*mask_noise[ipoin]*normal_z*free_surface_jacobian2Dw[igll+25*iface]);
-
- }
-}
-
-extern "C" void noise_read_add_surface_movie_cuda_(long* Mesh_pointer_f, real* h_noise_surface_movie, int* num_free_surface_faces_f,int* NOISE_TOMOGRAPHYf) {
-
- // EPIK_TRACER("noise_read_add_surface_movie_cuda");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- int num_free_surface_faces = *num_free_surface_faces_f;
- int NOISE_TOMOGRAPHY = *NOISE_TOMOGRAPHYf;
- float* d_noise_surface_movie;
- cudaMalloc((void**)&d_noise_surface_movie,3*25*num_free_surface_faces*sizeof(float));
- cudaMemcpy(d_noise_surface_movie, h_noise_surface_movie,3*25*num_free_surface_faces*sizeof(real),cudaMemcpyHostToDevice);
-
- int num_blocks_x = num_free_surface_faces;
- int num_blocks_y = 1;
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
- dim3 grid(num_blocks_x,num_blocks_y,1);
- dim3 threads(25,1,1);
-
- // float* h_debug = (float*)calloc(128,sizeof(float));
- float* d_debug;
- // cudaMalloc((void**)&d_debug,128*sizeof(float));
- // cudaMemcpy(d_debug,h_debug,128*sizeof(float),cudaMemcpyHostToDevice);
-
- if(NOISE_TOMOGRAPHY == 2) { // add surface source to forward field
- noise_read_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_accel,
- mp->d_ibool,
- mp->d_free_surface_ispec,
- mp->d_free_surface_ijk,
- num_free_surface_faces,
- d_noise_surface_movie,
- mp->d_normal_x_noise,
- mp->d_normal_y_noise,
- mp->d_normal_z_noise,
- mp->d_mask_noise,
- mp->d_free_surface_jacobian2Dw,
- mp->d_wgllwgll_xy,
- d_debug);
- }
- else if(NOISE_TOMOGRAPHY==3) { // add surface source to adjoint (backward) field
- noise_read_add_surface_movie_cuda_kernel<<<grid,threads>>>(mp->d_b_accel,
- mp->d_ibool,
- mp->d_free_surface_ispec,
- mp->d_free_surface_ijk,
- num_free_surface_faces,
- d_noise_surface_movie,
- mp->d_normal_x_noise,
- mp->d_normal_y_noise,
- mp->d_normal_z_noise,
- mp->d_mask_noise,
- mp->d_free_surface_jacobian2Dw,
- mp->d_wgllwgll_xy,
- d_debug);
- }
-
-
- // cudaMemcpy(h_debug,d_debug,128*sizeof(float),cudaMemcpyDeviceToHost);
- // for(int i=0;i<8;i++) {
- // printf("debug[%d]= %e\n",i,h_debug[i]);
- // }
- // MPI_Abort(MPI_COMM_WORLD,1);
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
-
- exit_on_cuda_error("noise_read_add_surface_movie_cuda_kernel");
- // sync and check to catch errors from previous async operations
- // cudaThreadSynchronize();
- // cudaError_t err = cudaGetLastError();
- // if (err != cudaSuccess)
- // {
- // fprintf(stderr,"Error launching/running noise_read_add_surface_movie_cuda_kernel: %s\n", cudaGetErrorString(err));
- // exit(1);
- // }
-#endif
-
- cudaFree(d_noise_surface_movie);
-}
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_constants_cuda.h 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_constants_cuda.h 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,83 +0,0 @@
-#ifndef CUDA_HEADER_H
-#define CUDA_HEADER_H
-/* CUDA specific things from specfem3D_kernels.cu */
-
-#define NGLL2 25
-
-
-#ifdef USE_TEXTURES
-// declaration of textures
-texture<float, 1, cudaReadModeElementType> tex_displ;
-texture<float, 1, cudaReadModeElementType> tex_accel;
-
-// for binding the textures
-
- void bindTexturesDispl(float* d_displ)
- {
- cudaError_t err;
-
- cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<float>();
-
- err = cudaBindTexture(NULL,tex_displ, d_displ, channelDescFloat, NDIM*NGLOB*sizeof(float));
- if (err != cudaSuccess)
- {
- fprintf(stderr, "Error in bindTexturesDispl for displ: %s\n", cudaGetErrorString(err));
- exit(1);
- }
- }
-
- void bindTexturesAccel(float* d_accel)
- {
- cudaError_t err;
-
- cudaChannelFormatDesc channelDescFloat = cudaCreateChannelDesc<float>();
-
- err = cudaBindTexture(NULL,tex_accel, d_accel, channelDescFloat, NDIM*NGLOB*sizeof(float));
- if (err != cudaSuccess)
- {
- fprintf(stderr, "Error in bindTexturesAccel for accel: %s\n", cudaGetErrorString(err));
- exit(1);
- }
- }
-
-#endif
-
-// setters for these const arrays (very ugly hack, but will have to do)
-
-void setConst_hprime_xx(float* array);
-
-void setConst_hprimewgll_xx(float* array);
-
-void setConst_wgllwgll_xy(float* array,Mesh* mp);
-
-void setConst_wgllwgll_xz(float* array, Mesh* mp);
-
-void setConst_wgllwgll_yz(float* array, Mesh* mp);
-void exit_on_cuda_error(char* kernel_name);
-
-void show_free_memory(char* info_str);
-
-void print_CUDA_error_if_any(cudaError_t err, int num)
-{
- if (cudaSuccess != err)
- {
- printf("\nCUDA error !!!!! <%s> !!!!! at CUDA call # %d\n",cudaGetErrorString(err),num);
- pause_for_debugger(1);
- show_free_memory("after error\n");
- fflush(stdout);
-#ifdef USE_MPI
- MPI_Abort(MPI_COMM_WORLD,1);
-#endif
- exit(0);
- }
- return;
-}
-
-
-
-
-
-
-
-
-#endif //CUDA_HEADER_H
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_mesh_constants_cuda.cu 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_mesh_constants_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,1013 +0,0 @@
-#include <stdio.h>
-#include <cuda.h>
-#include <cublas.h>
-#include <mpi.h>
-
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include "mesh_constants_cuda.h"
-
-#include "prepare_constants_cuda.h"
-
-#define MAX(x,y) (((x) < (y)) ? (y) : (x))
-
-typedef float real;
-
-extern "C" void pause_for_debug_() {
- pause_for_debugger(1);
-}
-
-void pause_for_debugger(int pause) {
- if(pause) {
- int myrank;
- MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
- printf("I'm rank %d\n",myrank);
- int i = 0;
- char hostname[256];
- gethostname(hostname, sizeof(hostname));
- printf("PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
- FILE *file = fopen("/scratch/eiger/rietmann/attach_gdb.txt","w+");
- fprintf(file,"PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
- fclose(file);
- fflush(stdout);
- while (0 == i)
- sleep(5);
- }
-}
-
-void exit_on_cuda_error(char* kernel_name) {
-
-cudaThreadSynchronize();
- cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess)
- {
- fprintf(stderr,"Error after %s: %s\n", kernel_name, cudaGetErrorString(err));
- pause_for_debugger(0);
- exit(1);
- }
-}
-
-// Saves GPU memory usage to file
-void output_free_memory(char* info_str) {
- int proc;
- MPI_Comm_rank(MPI_COMM_WORLD,&proc);
- FILE* fp;
- char filename[BUFSIZ];
- sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_mem_usage_proc_%03d.txt",proc);
- fp = fopen(filename,"a+");
-
- size_t free_byte ;
- size_t total_byte ;
- cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
- if ( cudaSuccess != cuda_status ){
- printf("Error: cudaMemGetInfo fails, %s \n", cudaGetErrorString(cuda_status) );
- exit(1);
- }
-
- double free_db = (double)free_byte ;
- double total_db = (double)total_byte ;
- double used_db = total_db - free_db ;
- fprintf(fp,"%d: @%s GPU memory usage: used = %f, free = %f MB, total = %f MB\n", proc, info_str,
- used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
-}
-
-// Fortran-callable version of above method
-extern "C" void output_free_memory_(int* id) {
- char info[6];
- sprintf(info,"f %d:",id);
- output_free_memory(info);
-}
-
-void show_free_memory(char* info_str) {
-
- // show memory usage of GPU
- int proc;
- MPI_Comm_rank(MPI_COMM_WORLD,&proc);
-
- size_t free_byte ;
- size_t total_byte ;
- cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
- if ( cudaSuccess != cuda_status ){
- printf("Error: cudaMemGetInfo fails, %s \n", cudaGetErrorString(cuda_status) );
- exit(1);
- }
-
- double free_db = (double)free_byte ;
- double total_db = (double)total_byte ;
- double used_db = total_db - free_db ;
- printf("%d: @%s GPU memory usage: used = %f, free = %f MB, total = %f MB\n", proc, info_str,
- used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
-
-}
-
-extern "C" void show_free_device_memory_() {
- show_free_memory("from fortran");
-}
-
-void prepare_constants(int NGLLX, int NSPEC_AB, int NGLOB_AB,
- float* h_xix, float* h_xiy, float* h_xiz,
- float** d_xix, float** d_xiy, float** d_xiz,
- float* h_etax, float* h_etay, float* h_etaz,
- float** d_etax, float** d_etay, float** d_etaz,
- float* h_gammax, float* h_gammay, float* h_gammaz,
- float** d_gammax, float** d_gammay, float** d_gammaz,
- float* h_kappav, float* h_muv,
- float** d_kappav, float** d_muv,
- int* h_ibool, int** d_ibool,
- int* h_phase_ispec_inner_elastic, int** d_phase_ispec_inner_elastic,
- int num_phase_ispec_elastic,
- float* h_rmass, float** d_rmass,
- int num_interfaces_ext_mesh, int max_nibool_interfaces_ext_mesh,
- int* h_nibool_interfaces_ext_mesh, int** d_nibool_interfaces_ext_mesh,
- int* h_ibool_interfaces_ext_mesh, int** d_ibool_interfaces_ext_mesh,
- float* h_hprime_xx, float* h_hprimewgll_xx,
- float* h_wgllwgll_xy, float* h_wgllwgll_xz,
- float* h_wgllwgll_yz,
- int* h_abs_boundary_ispec, int** d_abs_boundary_ispec,
- int* h_abs_boundary_ijk, int** d_abs_boundary_ijk,
- float* h_abs_boundary_normal, float** d_abs_boundary_normal,
- float* h_rho_vp,float** d_rho_vp,
- float* h_rho_vs,float** d_rho_vs,
- float* h_abs_boundary_jacobian2Dw,float** d_abs_boundary_jacobian2Dw,
- float* h_b_absorb_field,float** d_b_absorb_field,
- int num_abs_boundary_faces, int b_num_abs_boundary_faces,
- int* h_ispec_is_inner, int** d_ispec_is_inner,
- int* h_ispec_is_elastic, int** d_ispec_is_elastic,
- int NSOURCES,
- float* h_sourcearrays,float** d_sourcearrays,
- int* h_islice_selected_source, int** d_islice_selected_source,
- int* h_ispec_selected_source, int** d_ispec_selected_source
- )
-{
-
- // EPIK_USER_REG(r_name,"compute_forces");
- // EPIK_USER_REG(r_name,
-
- /* Assuming NGLLX=5. Padded is then 128 (5^3+3) */
- int size_padded = 128*NSPEC_AB;
- int size = NGLLX*NGLLX*NGLLX*NSPEC_AB;
-
- print_CUDA_error_if_any(cudaMalloc((void**) d_xix, size_padded*sizeof(float)),5);
- print_CUDA_error_if_any(cudaMalloc((void**) d_xiy, size_padded*sizeof(float)),6);
- print_CUDA_error_if_any(cudaMalloc((void**) d_xiz, size_padded*sizeof(float)),7);
- print_CUDA_error_if_any(cudaMalloc((void**) d_etax, size_padded*sizeof(float)),8);
- print_CUDA_error_if_any(cudaMalloc((void**) d_etay, size_padded*sizeof(float)),9);
- print_CUDA_error_if_any(cudaMalloc((void**) d_etaz, size_padded*sizeof(float)),10);
- print_CUDA_error_if_any(cudaMalloc((void**) d_gammax, size_padded*sizeof(float)),11);
- print_CUDA_error_if_any(cudaMalloc((void**) d_gammay, size_padded*sizeof(float)),12);
- print_CUDA_error_if_any(cudaMalloc((void**) d_gammaz, size_padded*sizeof(float)),13);
- print_CUDA_error_if_any(cudaMalloc((void**) d_kappav, size_padded*sizeof(float)),14);
- print_CUDA_error_if_any(cudaMalloc((void**) d_muv, size_padded*sizeof(float)),15);
- print_CUDA_error_if_any(cudaMalloc((void**) d_ibool, size_padded*sizeof(int)),16);
- print_CUDA_error_if_any(cudaMalloc((void**) d_phase_ispec_inner_elastic, num_phase_ispec_elastic*2*sizeof(int)),17);
- print_CUDA_error_if_any(cudaMalloc((void**) d_rmass, NGLOB_AB*sizeof(float)),17);
-
- print_CUDA_error_if_any(cudaMalloc((void**) d_abs_boundary_ispec,
- num_abs_boundary_faces*sizeof(int)),69);
- print_CUDA_error_if_any(cudaMemcpy(*d_abs_boundary_ispec, h_abs_boundary_ispec,
- num_abs_boundary_faces*sizeof(int),
- cudaMemcpyHostToDevice),70);
-
- print_CUDA_error_if_any(cudaMalloc((void**) d_abs_boundary_ijk,
- 3*25*num_abs_boundary_faces*sizeof(int)),2);
- print_CUDA_error_if_any(cudaMemcpy(*d_abs_boundary_ijk, h_abs_boundary_ijk,
- 3*25*num_abs_boundary_faces*sizeof(int),
- cudaMemcpyHostToDevice),2);
-
- print_CUDA_error_if_any(cudaMalloc((void**) d_abs_boundary_normal,
- 3*25*num_abs_boundary_faces*sizeof(int)),3);
- print_CUDA_error_if_any(cudaMemcpy(*d_abs_boundary_normal, h_abs_boundary_normal,
- 3*25*num_abs_boundary_faces*sizeof(int),
- cudaMemcpyHostToDevice),3);
-
- print_CUDA_error_if_any(cudaMalloc((void**) d_abs_boundary_jacobian2Dw,
- 25*num_abs_boundary_faces*sizeof(float)),4);
- print_CUDA_error_if_any(cudaMemcpy(*d_abs_boundary_jacobian2Dw, h_abs_boundary_jacobian2Dw,
- 25*num_abs_boundary_faces*sizeof(float),
- cudaMemcpyHostToDevice),1);
-
- print_CUDA_error_if_any(cudaMalloc((void**) d_rho_vp, size*sizeof(float)),5);
- print_CUDA_error_if_any(cudaMalloc((void**) d_rho_vs, size*sizeof(float)),6);
- print_CUDA_error_if_any(cudaMemcpy(*d_rho_vp,h_rho_vp,size*sizeof(float),
- cudaMemcpyHostToDevice),5);
- print_CUDA_error_if_any(cudaMemcpy(*d_rho_vs,h_rho_vs,size*sizeof(float),
- cudaMemcpyHostToDevice),5);
-
- print_CUDA_error_if_any(cudaMalloc((void**) d_b_absorb_field, 3*25*b_num_abs_boundary_faces*sizeof(float)),7);
- print_CUDA_error_if_any(cudaMemcpy(*d_b_absorb_field, h_b_absorb_field,
- 3*25*b_num_abs_boundary_faces*sizeof(float),
- cudaMemcpyHostToDevice),7);
-
- print_CUDA_error_if_any(cudaMemcpy(*d_rmass,h_rmass,NGLOB_AB*sizeof(float),cudaMemcpyHostToDevice),18);
-
- // prepare interprocess-edge exchange information
- print_CUDA_error_if_any(cudaMalloc((void**) d_nibool_interfaces_ext_mesh,
- num_interfaces_ext_mesh*sizeof(int)),19);
- print_CUDA_error_if_any(cudaMemcpy(*d_nibool_interfaces_ext_mesh,h_nibool_interfaces_ext_mesh,
- num_interfaces_ext_mesh*sizeof(int),cudaMemcpyHostToDevice),19);
-
- print_CUDA_error_if_any(cudaMalloc((void**) d_ibool_interfaces_ext_mesh,
- num_interfaces_ext_mesh*max_nibool_interfaces_ext_mesh*sizeof(int)),20);
- print_CUDA_error_if_any(cudaMemcpy(*d_ibool_interfaces_ext_mesh,h_ibool_interfaces_ext_mesh,
- num_interfaces_ext_mesh*max_nibool_interfaces_ext_mesh*sizeof(int),
- cudaMemcpyHostToDevice),20);
-
- print_CUDA_error_if_any(cudaMalloc((void**) d_ispec_is_inner,NSPEC_AB*sizeof(int)),21);
- print_CUDA_error_if_any(cudaMemcpy(*d_ispec_is_inner, h_ispec_is_inner,
- NSPEC_AB*sizeof(int),
- cudaMemcpyHostToDevice),21);
- print_CUDA_error_if_any(cudaMalloc((void**) d_ispec_is_elastic,NSPEC_AB*sizeof(int)),21);
- print_CUDA_error_if_any(cudaMemcpy(*d_ispec_is_elastic, h_ispec_is_elastic,
- NSPEC_AB*sizeof(int),
- cudaMemcpyHostToDevice),21);
-
- print_CUDA_error_if_any(cudaMemcpy(*d_ibool, h_ibool,
- size*sizeof(int) ,cudaMemcpyHostToDevice),12);
-
- print_CUDA_error_if_any(cudaMalloc((void**)d_sourcearrays, sizeof(float)*NSOURCES*3*125),22);
- print_CUDA_error_if_any(cudaMemcpy(*d_sourcearrays, h_sourcearrays, sizeof(float)*NSOURCES*3*125,
- cudaMemcpyHostToDevice),22);
-
- print_CUDA_error_if_any(cudaMalloc((void**)d_islice_selected_source, sizeof(int)*NSOURCES),23);
- print_CUDA_error_if_any(cudaMemcpy(*d_islice_selected_source, h_islice_selected_source, sizeof(int)*NSOURCES,
- cudaMemcpyHostToDevice),23);
-
- print_CUDA_error_if_any(cudaMalloc((void**)d_ispec_selected_source, sizeof(int)*NSOURCES),24);
- print_CUDA_error_if_any(cudaMemcpy(*d_ispec_selected_source, h_ispec_selected_source,sizeof(int)*NSOURCES,
- cudaMemcpyHostToDevice),24);
-
- // transfer constant element data with padding
- for(int i=0;i<NSPEC_AB;i++) {
- print_CUDA_error_if_any(cudaMemcpy(*d_xix + i*128, &h_xix[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),69);
- print_CUDA_error_if_any(cudaMemcpy(*d_xiy+i*128, &h_xiy[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),11);
- print_CUDA_error_if_any(cudaMemcpy(*d_xiz+i*128, &h_xiz[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),3);
- print_CUDA_error_if_any(cudaMemcpy(*d_etax+i*128, &h_etax[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),4);
- print_CUDA_error_if_any(cudaMemcpy(*d_etay+i*128, &h_etay[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),5);
- print_CUDA_error_if_any(cudaMemcpy(*d_etaz+i*128, &h_etaz[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),6);
- print_CUDA_error_if_any(cudaMemcpy(*d_gammax+i*128,&h_gammax[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),7);
- print_CUDA_error_if_any(cudaMemcpy(*d_gammay+i*128,&h_gammay[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),8);
- print_CUDA_error_if_any(cudaMemcpy(*d_gammaz+i*128,&h_gammaz[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),9);
- print_CUDA_error_if_any(cudaMemcpy(*d_kappav+i*128,&h_kappav[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),10);
- print_CUDA_error_if_any(cudaMemcpy(*d_muv+i*128, &h_muv[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),11);
-
- }
-
-
-
- print_CUDA_error_if_any(cudaMemcpy(*d_phase_ispec_inner_elastic, h_phase_ispec_inner_elastic, num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),13);
-
-
-
-
-
-}
-
-
-extern "C" void prepare_constants_device_(long* Mesh_pointer,int* NGLLX, int* NSPEC_AB, int* NGLOB_AB,
- float* h_xix, float* h_xiy, float* h_xiz,
- float* h_etax, float* h_etay, float* h_etaz,
- float* h_gammax, float* h_gammay, float* h_gammaz,
- float* h_kappav, float* h_muv,
- int* h_ibool, int* h_phase_ispec_inner_elastic,
- int* num_phase_ispec_elastic,
- float* h_rmass,
- int* num_interfaces_ext_mesh, int* max_nibool_interfaces_ext_mesh,
- int* h_nibool_interfaces_ext_mesh, int* h_ibool_interfaces_ext_mesh,
- float* h_hprime_xx, float* h_hprimewgll_xx,
- float* h_wgllwgll_xy, float* h_wgllwgll_xz,
- float* h_wgllwgll_yz,
- int* h_abs_boundary_ispec, int* h_abs_boundary_ijk,
- float* h_abs_boundary_normal,
- float* h_rho_vp,
- float* h_rho_vs,
- float* h_abs_boundary_jacobian2Dw,
- float* h_b_absorb_field,
- int* num_abs_boundary_faces, int* b_num_abs_boundary_faces,
- int* h_ispec_is_inner, int* h_ispec_is_elastic,
- int* NSOURCES,
- float* h_sourcearrays,
- int* h_islice_selected_source,
- int* h_ispec_selected_source,
- int* h_number_receiver_global,
- int* h_ispec_selected_rec,
- int* nrec_local_f,
- int* nrec_f
- ) {
-
- int device_count,procid;
- cuInit(0);
- cudaGetDeviceCount(&device_count);
- MPI_Comm_rank(MPI_COMM_WORLD, &procid);
- if(device_count > 1) {
- cudaSetDevice((procid)%2);
- exit_on_cuda_error("cudaSetDevice");
- }
-
- printf("GPU_MODE Active. Preparing Fields and Constants on Device.\n");
-
- Mesh* mp = (Mesh*)malloc(sizeof(Mesh));
- *Mesh_pointer = (long)mp;
-
- mp->NGLLX = *NGLLX;
- mp->NSPEC_AB = *NSPEC_AB;
- mp->NGLOB_AB = *NGLOB_AB;
- mp->d_num_phase_ispec_elastic = *num_phase_ispec_elastic;
- setConst_hprime_xx (h_hprime_xx );
- setConst_hprimewgll_xx(h_hprimewgll_xx);
- setConst_wgllwgll_xy (h_wgllwgll_xy,mp);
- setConst_wgllwgll_xz (h_wgllwgll_xz,mp);
- setConst_wgllwgll_yz (h_wgllwgll_yz,mp);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_stf_pre_compute),
- *NSOURCES*sizeof(double)),1);
-
- int size_padded = 128* *NSPEC_AB;
- int size = *NGLLX * *NGLLX * *NGLLX * *NSPEC_AB;
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xix, size_padded*sizeof(float)),5);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiy, size_padded*sizeof(float)),6);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_xiz, size_padded*sizeof(float)),7);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etax, size_padded*sizeof(float)),8);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etay, size_padded*sizeof(float)),9);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_etaz, size_padded*sizeof(float)),10);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammax, size_padded*sizeof(float)),11);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammay, size_padded*sizeof(float)),12);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_gammaz, size_padded*sizeof(float)),13);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_kappav, size_padded*sizeof(float)),14);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_muv, size_padded*sizeof(float)),15);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool, size_padded*sizeof(int)),16);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibool_interfaces_ext_mesh,
- *num_interfaces_ext_mesh* *max_nibool_interfaces_ext_mesh*
- sizeof(int)),20);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_phase_ispec_inner_elastic, *num_phase_ispec_elastic*2*sizeof(int)),17);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_elastic, h_phase_ispec_inner_elastic, *num_phase_ispec_elastic*2*sizeof(int),cudaMemcpyHostToDevice),13);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rmass, *NGLOB_AB*sizeof(float)),17);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass,h_rmass,*NGLOB_AB*sizeof(float),cudaMemcpyHostToDevice),18);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_ispec,
- *num_abs_boundary_faces*sizeof(int)),69);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ispec, h_abs_boundary_ispec,
- *num_abs_boundary_faces*sizeof(int),
- cudaMemcpyHostToDevice),70);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_ijk,
- 3*25**num_abs_boundary_faces*sizeof(int)),2);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_ijk, h_abs_boundary_ijk,
- 3*25**num_abs_boundary_faces*sizeof(int),
- cudaMemcpyHostToDevice),2);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_normal,
- 3*25**num_abs_boundary_faces*sizeof(int)),3);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_normal, h_abs_boundary_normal,
- 3*25**num_abs_boundary_faces*sizeof(int),
- cudaMemcpyHostToDevice),3);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_abs_boundary_jacobian2Dw,
- 25**num_abs_boundary_faces*sizeof(float)),4);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_abs_boundary_jacobian2Dw, h_abs_boundary_jacobian2Dw,
- 25**num_abs_boundary_faces*sizeof(float),
- cudaMemcpyHostToDevice),1);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rho_vp, size*sizeof(float)),5);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_rho_vs, size*sizeof(float)),6);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vp,h_rho_vp,size*sizeof(float),
- cudaMemcpyHostToDevice),5);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rho_vs,h_rho_vs,size*sizeof(float),
- cudaMemcpyHostToDevice),5);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_b_absorb_field, 3*25* *b_num_abs_boundary_faces*sizeof(float)),7);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_field, h_b_absorb_field,
- 3*25* *b_num_abs_boundary_faces*sizeof(float),
- cudaMemcpyHostToDevice),7);
-
-
- // prepare interprocess-edge exchange information
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_nibool_interfaces_ext_mesh,
- *num_interfaces_ext_mesh*sizeof(int)),19);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_nibool_interfaces_ext_mesh,h_nibool_interfaces_ext_mesh,
- *num_interfaces_ext_mesh*sizeof(int),cudaMemcpyHostToDevice),19);
-
-
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool_interfaces_ext_mesh,h_ibool_interfaces_ext_mesh,
- *num_interfaces_ext_mesh* *max_nibool_interfaces_ext_mesh*sizeof(int),
- cudaMemcpyHostToDevice),20);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_inner,*NSPEC_AB*sizeof(int)),21);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_inner, h_ispec_is_inner,
- *NSPEC_AB*sizeof(int),
- cudaMemcpyHostToDevice),21);
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ispec_is_elastic,*NSPEC_AB*sizeof(int)),21);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_elastic, h_ispec_is_elastic,
- *NSPEC_AB*sizeof(int),
- cudaMemcpyHostToDevice),21);
-
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibool, h_ibool,
- size*sizeof(int) ,cudaMemcpyHostToDevice),12);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_sourcearrays, sizeof(float)* *NSOURCES*3*125),22);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_sourcearrays, h_sourcearrays, sizeof(float)* *NSOURCES*3*125,
- cudaMemcpyHostToDevice),22);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_source, sizeof(int) * *NSOURCES),23);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_source, h_islice_selected_source, sizeof(int)* *NSOURCES,
- cudaMemcpyHostToDevice),23);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_ispec_selected_source, sizeof(int)* *NSOURCES),24);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_selected_source, h_ispec_selected_source,sizeof(int)* *NSOURCES,
- cudaMemcpyHostToDevice),24);
-
- // transfer constant element data with padding
- for(int i=0;i<*NSPEC_AB;i++) {
- print_CUDA_error_if_any(cudaMemcpy(mp->d_xix + i*128, &h_xix[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),69);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_xiy+i*128, &h_xiy[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),11);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_xiz+i*128, &h_xiz[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),3);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_etax+i*128, &h_etax[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),4);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_etay+i*128, &h_etay[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),5);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_etaz+i*128, &h_etaz[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),6);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_gammax+i*128,&h_gammax[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),7);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_gammay+i*128,&h_gammay[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),8);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_gammaz+i*128,&h_gammaz[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),9);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_kappav+i*128,&h_kappav[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),10);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_muv+i*128, &h_muv[i*125],
- 125*sizeof(float),cudaMemcpyHostToDevice),11);
-
- }
-
- int nrec_local = *nrec_local_f;
- int nrec = *nrec_f;
-
- // note that:
- // size(number_receiver_global) = nrec_local
- // size(ispec_selected_rec) = nrec
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_number_receiver_global),nrec_local*sizeof(int)),1);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_selected_rec),nrec*sizeof(int)),2);
- cudaMemcpy(mp->d_number_receiver_global,h_number_receiver_global,nrec_local*sizeof(int),
- cudaMemcpyHostToDevice);
-
- cudaMemcpy(mp->d_ispec_selected_rec,h_ispec_selected_rec,nrec*sizeof(int),
- cudaMemcpyHostToDevice);
-
- mp->nrec_local = nrec_local;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_field),3*125*nrec_local*sizeof(float)),3);
- mp->h_station_seismo_field = (float*)malloc(3*125*nrec_local*sizeof(float));
-
-}
-
-extern "C" void prepare_and_transfer_noise_backward_fields_(long* Mesh_pointer_f,
- int* size,
- real* b_displ,
- real* b_veloc,
- real* b_accel,
- real* b_epsilondev_xx,
- real* b_epsilondev_yy,
- real* b_epsilondev_xy,
- real* b_epsilondev_xz,
- real* b_epsilondev_yz,
- int* NSPEC_STRAIN_ONLY) {
- //show_free_memory("prep_and_xfer_noise_bwd_fields");
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
- int epsilondev_size = 128*(*NSPEC_STRAIN_ONLY);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ),*size*sizeof(real)),1);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc),*size*sizeof(real)),2);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel),*size*sizeof(real)),3);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx),
- epsilondev_size*sizeof(real)),4);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy),
- epsilondev_size*sizeof(real)),4);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy),
- epsilondev_size*sizeof(real)),4);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz),
- epsilondev_size*sizeof(real)),4);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz),
- epsilondev_size*sizeof(real)),4);
-
-
- cudaMemcpy(mp->d_b_displ,b_displ,*size*sizeof(real),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_veloc,b_veloc,*size*sizeof(real),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_accel,b_accel,*size*sizeof(real),cudaMemcpyHostToDevice);
-
- cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,
- epsilondev_size*sizeof(real),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,
- epsilondev_size*sizeof(real),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,
- epsilondev_size*sizeof(real),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,
- epsilondev_size*sizeof(real),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,
- epsilondev_size*sizeof(real),cudaMemcpyHostToDevice);
-
-}
-extern "C"
-void prepare_and_transfer_noise_backward_constants_(long* Mesh_pointer_f,
- float* normal_x_noise,
- float* normal_y_noise,
- float* normal_z_noise,
- float* mask_noise,
- float* free_surface_jacobian2Dw,
- int* nfaces_surface_ext_mesh
- ) {
-
- //show_free_memory("prep_and_xfer_noise_bwd_constants");
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- int nface_size = 5*5*(*nfaces_surface_ext_mesh);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_x_noise,
- nface_size*sizeof(float)),1);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_y_noise,
- nface_size*sizeof(float)),2);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise,
- nface_size*sizeof(float)),3);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise, nface_size*sizeof(float)),4);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_free_surface_jacobian2Dw,
- nface_size*sizeof(float)),5);
-
- cudaMemcpy(mp->d_normal_x_noise, normal_x_noise, nface_size*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_normal_y_noise, normal_y_noise, nface_size*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_normal_z_noise, normal_z_noise, nface_size*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_mask_noise, mask_noise, nface_size*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_free_surface_jacobian2Dw, free_surface_jacobian2Dw, nface_size*sizeof(float),cudaMemcpyHostToDevice);
- printf("jacobian_size = %d\n",25*(*nfaces_surface_ext_mesh));
-}
-
-extern "C" void prepare_noise_constants_device_(long* Mesh_pointer_f, int* NGLLX,
- int* NSPEC_AB, int* NGLOB_AB,
- int* free_surface_ispec,int* free_surface_ijk,
- int* num_free_surface_faces,
- int* size_free_surface_ijk, int* SIMULATION_TYPE) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-
- mp->num_free_surface_faces = *num_free_surface_faces;
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ispec, *num_free_surface_faces*sizeof(int)),1);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec, free_surface_ispec, *num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),1);
-
- // alloc storage for the surface buffer to be copied
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie, 3*25*(*num_free_surface_faces)*sizeof(float)),1);
-
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_free_surface_ijk, (*size_free_surface_ijk)*sizeof(float)),1);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,(*size_free_surface_ijk)*sizeof(float),cudaMemcpyHostToDevice),1);
-
-}
-
-extern "C" void prepare_sensitivity_kernels_(long* Mesh_pointer_f,
- float* rho_kl,
- float* mu_kl,
- float* kappa_kl,
- float* epsilon_trace_over_3,
- float* b_epsilon_trace_over_3,
- float* Sigma_kl,
- int* NSPEC_ADJOINTf) {
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
- int NSPEC_ADJOINT = *NSPEC_ADJOINTf;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_kl),
- 125*mp->NSPEC_AB*sizeof(float)),8);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_mu_kl),
- 125*mp->NSPEC_AB*sizeof(float)),8);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_kl),
- 125*mp->NSPEC_AB*sizeof(float)),8);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3),
- 125*mp->NSPEC_AB*sizeof(float)),8);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3),
- 125*mp->NSPEC_AB*sizeof(float)),8);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_Sigma_kl),
- 125*(mp->NSPEC_AB)*sizeof(float)),9);
-
- cudaMemcpy(mp->d_rho_kl,rho_kl, 125*NSPEC_ADJOINT*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_mu_kl,mu_kl, 125*NSPEC_ADJOINT*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_kappa_kl,kappa_kl, 125*NSPEC_ADJOINT*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3,
- 125*NSPEC_ADJOINT*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3,
- 125*NSPEC_ADJOINT*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_Sigma_kl, Sigma_kl, 125*(NSPEC_ADJOINT)*sizeof(float),
- cudaMemcpyHostToDevice);
-
- exit_on_cuda_error("prepare_sensitivity_kernels");
-}
-
-
-extern "C" void prepare_adjoint_constants_device_(long* Mesh_pointer_f,
- int* NGLLX,
- int* ispec_selected_rec,
- int* islice_selected_rec,
- int* islice_selected_rec_size,
- int* nrec,
- float* noise_sourcearray,
- int* NSTEP,
- float* epsilondev_xx,
- float* epsilondev_yy,
- float* epsilondev_xy,
- float* epsilondev_xz,
- float* epsilondev_yz,
- int* NSPEC_STRAIN_ONLY
- ) {
- exit_on_cuda_error("prepare_adjoint_constants_device 1");
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
- int epsilondev_size = 128*(*NSPEC_STRAIN_ONLY);
-
- // already done earlier
- // print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_ispec_selected_rec,
- // *nrec*sizeof(int)),1);
- // cudaMemcpy(mp->d_ispec_selected_rec,ispec_selected_rec, *nrec*sizeof(int),
- // cudaMemcpyHostToDevice);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_rec,
- *islice_selected_rec_size*sizeof(int)),2);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray,
- 3*125*(*NSTEP)*sizeof(float)),2);
-
-
- cudaMemcpy(mp->d_noise_sourcearray, noise_sourcearray,
- 3*125*(*NSTEP)*sizeof(float),
- cudaMemcpyHostToDevice);
-
- exit_on_cuda_error("prepare_adjoint_constants_device 2");
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xx,
- epsilondev_size*sizeof(float)),3);
- cudaMemcpy(mp->d_epsilondev_xx,epsilondev_xx,epsilondev_size*sizeof(float),
- cudaMemcpyHostToDevice);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yy,
- epsilondev_size*sizeof(float)),4);
- cudaMemcpy(mp->d_epsilondev_yy,epsilondev_yy,epsilondev_size*sizeof(float),
- cudaMemcpyHostToDevice);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xy,
- epsilondev_size*sizeof(float)),5);
- cudaMemcpy(mp->d_epsilondev_xy,epsilondev_xy,epsilondev_size*sizeof(float),
- cudaMemcpyHostToDevice);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_xz,
- epsilondev_size*sizeof(float)),6);
- cudaMemcpy(mp->d_epsilondev_xz,epsilondev_xz,epsilondev_size*sizeof(float),
- cudaMemcpyHostToDevice);
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_epsilondev_yz,
- epsilondev_size*sizeof(float)),7);
- cudaMemcpy(mp->d_epsilondev_yz,epsilondev_yz,epsilondev_size*sizeof(float),
- cudaMemcpyHostToDevice);
- exit_on_cuda_error("prepare_adjoint_constants_device 3");
-
-
- // these don't seem necessary and crash code for NOISE_TOMOGRAPHY >
- // 0 b/c rho_kl, etc not yet allocated when NT=1
-
-
-}
-
-extern "C" {
- void prepare_fields_device_(long* Mesh_pointer_f, int* size);
- void transfer_fields_to_device_(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f);
- void transfer_fields_from_device_(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f);
-}
-
-void prepare_fields_device_(long* Mesh_pointer_f, int* size) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ),sizeof(float)*(*size)),0);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_veloc),sizeof(float)*(*size)),1);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_accel),sizeof(float)*(*size)),2);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_accel_buffer),sizeof(float)*(*size)),2);
-
-}
-
-
-extern "C" void transfer_b_fields_to_device_(int* size, float* b_displ, float* b_veloc, float* b_accel,
- long* Mesh_pointer_f) {
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
- cudaMemcpy(mp->d_b_displ,b_displ,sizeof(float)*(*size),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_veloc,b_veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_accel,b_accel,sizeof(float)*(*size),cudaMemcpyHostToDevice);
-
-}
-
-void transfer_fields_to_device_(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(mp->d_displ,displ,sizeof(float)*(*size),cudaMemcpyHostToDevice),3);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_veloc,veloc,sizeof(float)*(*size),cudaMemcpyHostToDevice),4);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_accel,accel,sizeof(float)*(*size),cudaMemcpyHostToDevice),5);
-
-}
-
-extern "C" void transfer_b_fields_from_device_(int* size, float* b_displ, float* b_veloc, float* b_accel,long* Mesh_pointer_f) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- cudaMemcpy(b_displ,mp->d_b_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_veloc,mp->d_b_veloc,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_accel,mp->d_b_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost);
-
-}
-
-extern "C" void get_max_accel_(int* itf,int* sizef,long* Mesh_pointer) {
- Mesh* mp = (Mesh*)(*Mesh_pointer);
- int procid;
- MPI_Comm_rank(MPI_COMM_WORLD,&procid);
- int size = *sizef;
- int it = *itf;
- float* accel_cpy = (float*)malloc(size*sizeof(float));
- cudaMemcpy(accel_cpy,mp->d_accel,size*sizeof(float),cudaMemcpyDeviceToHost);
- float maxval=0;
- for(int i=0;i<size;++i) {
- maxval = MAX(maxval,accel_cpy[i]);
- }
- printf("%d/%d: max=%e\n",it,procid,maxval);
- free(accel_cpy);
-}
-
-extern "C" void transfer_accel_to_device_(int* size, float* accel,long* Mesh_pointer_f) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(mp->d_accel,accel,sizeof(float)*(*size),cudaMemcpyHostToDevice),6);
-
-}
-
-extern "C" void transfer_accel_from_device_(int* size, float* accel,long* Mesh_pointer_f) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
-
-}
-
-extern "C" void transfer_b_accel_from_device_(int* size, float* b_accel,long* Mesh_pointer_f) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
-
-}
-
-
-extern "C" void transfer_sigma_from_device_(int* size, float* sigma_kl,long* Mesh_pointer_f) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(sigma_kl,mp->d_Sigma_kl,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
-
-}
-
-
-
-
-extern "C" void transfer_b_displ_from_device_(int* size, float* displ,long* Mesh_pointer_f) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
-
-}
-
-extern "C" void transfer_displ_from_device_(int* size, float* displ,long* Mesh_pointer_f) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
-
-}
-
-extern "C" void transfer_compute_kernel_answers_from_device_(long* Mesh_pointer,
- float* rho_kl,int* size_rho,
- float* mu_kl, int* size_mu,
- float* kappa_kl, int* size_kappa) {
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);
-
-}
-
-extern "C" void transfer_compute_kernel_fields_from_device_(long* Mesh_pointer,
- float* accel, int* size_accel,
- float* b_displ, int* size_b_displ,
- float* epsilondev_xx,
- float* epsilondev_yy,
- float* epsilondev_xy,
- float* epsilondev_xz,
- float* epsilondev_yz,
- int* size_epsilondev,
- float* b_epsilondev_xx,
- float* b_epsilondev_yy,
- float* b_epsilondev_xy,
- float* b_epsilondev_xz,
- float* b_epsilondev_yz,
- int* size_b_epsilondev,
- float* rho_kl,int* size_rho,
- float* mu_kl, int* size_mu,
- float* kappa_kl, int* size_kappa,
- float* epsilon_trace_over_3,
- float* b_epsilon_trace_over_3,
- int* size_epsilon_trace_over_3) {
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- cudaMemcpy(accel,mp->d_accel,*size_accel*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_displ,mp->d_b_displ,*size_b_displ*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_xx,mp->d_epsilondev_xx,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_yy,mp->d_epsilondev_yy,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_xy,mp->d_epsilondev_xy,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_xz,mp->d_epsilondev_xz,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_yz,mp->d_epsilondev_yz,*size_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilondev_xx,mp->d_b_epsilondev_xx,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilondev_yy,mp->d_b_epsilondev_yy,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilondev_xy,mp->d_b_epsilondev_xy,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilondev_xz,mp->d_b_epsilondev_xz,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilondev_yz,mp->d_b_epsilondev_yz,*size_b_epsilondev*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilon_trace_over_3,mp->d_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(float),
- cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilon_trace_over_3,mp->d_b_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(float),
- cudaMemcpyDeviceToHost);
- exit_on_cuda_error("after transfer_compute_kernel_fields_from_device");
-}
-
-
-void transfer_fields_from_device_(int* size, float* displ, float* veloc, float* accel,long* Mesh_pointer_f) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(float)*(*size),cudaMemcpyDeviceToHost),6);
- print_CUDA_error_if_any(cudaMemcpy(veloc,mp->d_veloc,sizeof(float)*(*size),cudaMemcpyDeviceToHost),7);
- print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel,sizeof(float)*(*size),cudaMemcpyDeviceToHost),8);
-
- // printf("Transfered Fields From Device\n");
- // int procid;
- // MPI_Comm_rank(MPI_COMM_WORLD,&procid);
- // printf("Quick check of answer for p:%d in transfer_fields_from_device\n",procid);
- // for(int i=0;i<5;i++) {
- // printf("accel[%d]=%2.20e\n",i,accel[i]);
- // }
-
-}
-
-
-extern "C" void check_max_norm_displ_gpu_(int* size, float* displ,long* Mesh_pointer_f,int* announceID) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- cudaMemcpy(displ, mp->d_displ,*size*sizeof(float),cudaMemcpyDeviceToHost);
- float maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(displ[i]));
- }
- printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
-}
-
-extern "C" void check_max_norm_vector_(int* size, float* vector1, int* announceID) {
- int procid;
- MPI_Comm_rank(MPI_COMM_WORLD,&procid);
- float maxnorm=0;
- int maxloc;
- for(int i=0;i<*size;i++) {
- if(maxnorm<fabsf(vector1[i])) {
- maxnorm = vector1[i];
- maxloc = i;
- }
- }
- printf("%d:maxnorm of vector %d [%d] = %e\n",procid,*announceID,maxloc,maxnorm);
-}
-
-extern "C" void check_max_norm_displ_(int* size, float* displ, int* announceID) {
- float maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(displ[i]));
- }
- printf("%d: maxnorm of forward displ = %e\n",*announceID,maxnorm);
-}
-
-
-extern "C" void check_max_norm_b_displ_gpu_(int* size, float* b_displ,long* Mesh_pointer_f,int* announceID) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- float* b_accel = (float*)malloc(*size*sizeof(float));
-
- cudaMemcpy(b_displ, mp->d_b_displ,*size*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(float),cudaMemcpyDeviceToHost);
-
- float maxnorm=0;
- float maxnorm_accel=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
- maxnorm_accel = MAX(maxnorm,fabsf(b_accel[i]));
- }
- free(b_accel);
- printf("%d: maxnorm of backward displ = %e\n",*announceID,maxnorm);
- printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm_accel);
-}
-
-
-extern "C" void check_max_norm_b_accel_gpu_(int* size, float* b_accel,long* Mesh_pointer_f,int* announceID) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- cudaMemcpy(b_accel, mp->d_b_accel,*size*sizeof(float),cudaMemcpyDeviceToHost);
-
- float maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
- }
- printf("%d: maxnorm of backward accel = %e\n",*announceID,maxnorm);
-}
-
-extern "C" void check_max_norm_b_veloc_gpu_(int* size, float* b_veloc,long* Mesh_pointer_f,int* announceID) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- cudaMemcpy(b_veloc, mp->d_b_veloc,*size*sizeof(float),cudaMemcpyDeviceToHost);
-
- float maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(b_veloc[i]));
- }
- printf("%d: maxnorm of backward veloc = %e\n",*announceID,maxnorm);
-}
-
-extern "C" void check_max_norm_b_displ_(int* size, float* b_displ,int* announceID) {
-
- float maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(b_displ[i]));
- }
- printf("%d:maxnorm of backward displ = %e\n",*announceID,maxnorm);
-}
-
-
-extern "C" void check_max_norm_b_accel_(int* size, float* b_accel,int* announceID) {
-
- float maxnorm=0;
-
- for(int i=0;i<*size;i++) {
- maxnorm = MAX(maxnorm,fabsf(b_accel[i]));
- }
- printf("%d:maxnorm of backward accel = %e\n",*announceID,maxnorm);
-}
-
-extern "C" void check_error_vectors_(int* sizef, float* vector1,float* vector2) {
-
- int size = *sizef;
-
- double diff2 = 0;
- double sum = 0;
- double temp;
- double maxerr=0;
- int maxerrorloc;
-
- for(int i=0;i<size;++i) {
- temp = vector1[i]-vector2[i];
- diff2 += temp*temp;
- sum += vector1[i]*vector1[i];
- if(maxerr < fabsf(temp)) {
- maxerr = abs(temp);
- maxerrorloc = i;
- }
- }
-
- printf("rel error = %f, maxerr = %e @ %d\n",diff2/sum,maxerr,maxerrorloc);
- int myrank;
- MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
- if(myrank==0) {
- for(int i=maxerrorloc;i>maxerrorloc-5;i--) {
- printf("[%d]: %e vs. %e\n",i,vector1[i],vector2[i]);
- }
- }
-
-}
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -205,6 +205,9 @@
! prepares noise simulations
call prepare_timerun_noise()
+ ! prepares GPU arrays
+ if(GPU_MODE) call prepare_timerun_GPU()
+
end subroutine prepare_timerun
!
@@ -704,46 +707,85 @@
endif
end subroutine prepare_timerun_noise
-
- subroutine prepare_GPU()
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
- use specfem_par_movie
+!
+!-------------------------------------------------------------------------------------------------
+!
- implicit none
- character(len=256) :: plot_file
- integer :: ier
+ subroutine prepare_timerun_GPU()
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+ use specfem_par_movie
+
+ implicit none
+ real :: free_mb,used_mb,total_mb
+
+ ! GPU_MODE now defined in Par_file
+ if(myrank == 0 ) then
+ write(IMAIN,*)
write(IMAIN,*) "GPU_MODE Active. Preparing Fields and Constants on Device."
- call prepare_constants_device(Mesh_pointer, NGLLX, NSPEC_AB, NGLOB_AB, &
- xix, xiy, xiz, etax,etay,etaz, gammax, gammay, gammaz,&
- kappastore, mustore, ibool, phase_ispec_inner_elastic, num_phase_ispec_elastic,&
- rmass, num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
- nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,&
- hprime_xx, hprimewgll_xx,&
- wgllwgll_xy, wgllwgll_xz,&
- wgllwgll_yz,&
- abs_boundary_ispec, abs_boundary_ijk,&
- abs_boundary_normal,&
- rho_vp,rho_vs,&
- abs_boundary_jacobian2Dw,&
- b_absorb_field, num_abs_boundary_faces, b_num_abs_boundary_faces,&
- ispec_is_inner, ispec_is_elastic,&
- NSOURCES, sourcearrays, islice_selected_source, ispec_selected_source,&
- number_receiver_global,ispec_selected_rec,nrec_local,nrec)
+ write(IMAIN,*)
+ endif
+
+ ! prepares general fields on GPU
+ call prepare_constants_device(Mesh_pointer, &
+ NGLLX, NSPEC_AB, NGLOB_AB, &
+ xix, xiy, xiz, etax,etay,etaz, gammax, gammay, gammaz, &
+ kappastore, mustore,ibool, &
+ num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, &
+ nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh, &
+ hprime_xx, hprime_yy, hprime_zz, &
+ hprimewgll_xx, wgllwgll_xy, wgllwgll_xz, wgllwgll_yz, &
+ abs_boundary_ispec, abs_boundary_ijk, &
+ abs_boundary_normal, &
+ abs_boundary_jacobian2Dw, &
+ b_absorb_field, num_abs_boundary_faces, b_num_abs_boundary_faces, &
+ ispec_is_inner, &
+ NSOURCES, sourcearrays, islice_selected_source, ispec_selected_source, &
+ number_receiver_global, ispec_selected_rec, nrec, nrec_local, &
+ SIMULATION_TYPE)
- call prepare_fields_device(Mesh_pointer, NDIM*NGLOB_AB);
+! call prepare_fields_device(Mesh_pointer, NDIM*NGLOB_AB);
- if ( NOISE_TOMOGRAPHY > 0 ) then
+ ! prepares fields on GPU for acoustic simulations
+ if( ACOUSTIC_SIMULATION ) &
+ call prepare_fields_acoustic_device(Mesh_pointer,rmass_acoustic,rhostore,kappastore, &
+ num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
+ ispec_is_acoustic, &
+ num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
+ ABSORBING_CONDITIONS,b_reclen_potential,b_absorb_potential, &
+ SIMULATION_TYPE,rho_ac_kl,kappa_ac_kl)
+
+ ! prepares fields on GPU for elastic simulations
+ if( ELASTIC_SIMULATION ) &
+ call prepare_fields_elastic_device(Mesh_pointer, NDIM*NGLOB_AB, &
+ rmass,rho_vp,rho_vs, &
+ num_phase_ispec_elastic,phase_ispec_inner_elastic, &
+ ispec_is_elastic, &
+ ABSORBING_CONDITIONS,b_absorb_field,b_num_abs_boundary_faces)
- call prepare_noise_constants_device(Mesh_pointer, NGLLX, NSPEC_AB, NGLOB_AB, &
- free_surface_ispec,free_surface_ijk, num_free_surface_faces,size(free_surface_ijk), SIMULATION_TYPE)
+ ! prepares receiver arrays for adjoint runs
+ if( SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3 ) then
+ call prepare_adjoint_sim2_or_3_constants_device(Mesh_pointer, &
+ islice_selected_rec, &
+ size(islice_selected_rec))
+
+ endif
- call prepare_adjoint_constants_device(Mesh_pointer, NGLLX,&
- ispec_selected_rec,islice_selected_rec,nrec,size(islice_selected_rec),&
+ ! prepares fields on GPU for noise simulations
+ if ( NOISE_TOMOGRAPHY > 0 ) then
+ ! note: noise tomography is only supported for elastic domains so far.
+
+ ! copies noise (free) surface arrays to GPU
+ call prepare_noise_constants_device(Mesh_pointer, NGLLX, NSPEC_AB, NGLOB_AB, &
+ free_surface_ispec,free_surface_ijk,num_free_surface_faces,size(free_surface_ijk), &
+ SIMULATION_TYPE)
+
+ call prepare_adjoint_constants_device(Mesh_pointer, &
+ !ispec_selected_rec,islice_selected_rec,nrec,size(islice_selected_rec),&
noise_sourcearray, NSTEP,&
epsilondev_xx,&
epsilondev_yy,&
@@ -752,31 +794,61 @@
epsilondev_yz,&
NSPEC_STRAIN_ONLY)
- if(NOISE_TOMOGRAPHY > 1) &
- call prepare_and_transfer_noise_backward_constants(Mesh_pointer,&
+ if(NOISE_TOMOGRAPHY > 1) &
+ call prepare_and_transfer_noise_backward_constants(Mesh_pointer,&
normal_x_noise,&
normal_y_noise,&
normal_z_noise,&
mask_noise,&
free_surface_jacobian2Dw,&
nfaces_surface_ext_mesh)
- if( SIMULATION_TYPE == 3) then ! now have backward fields in addition to standard fields
-
- call prepare_and_transfer_noise_backward_fields(Mesh_pointer, NDIM*NGLOB_AB, &
+
+ if( SIMULATION_TYPE == 3) then
+ ! now have backward fields in addition to standard fields
+ call prepare_and_transfer_noise_backward_fields(Mesh_pointer, NDIM*NGLOB_AB, &
b_displ, b_veloc, b_accel,&
b_epsilondev_xx, b_epsilondev_yy, b_epsilondev_xy,&
- b_epsilondev_xz, b_epsilondev_yz,NSPEC_STRAIN_ONLY)
- call prepare_sensitivity_kernels(Mesh_pointer,&
+ b_epsilondev_xz, b_epsilondev_yz, &
+ NSPEC_STRAIN_ONLY)
+
+ call prepare_sensitivity_kernels(Mesh_pointer,&
rho_kl,mu_kl,kappa_kl,&
epsilon_trace_over_3,b_epsilon_trace_over_3,&
Sigma_kl,NSPEC_AB)
- endif
- end if
+ endif
+ endif ! NOISE_TOMOGRAPHY
- ! transfer forward and backward fields to device with initial values
- call transfer_fields_to_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
- if(SIMULATION_TYPE == 3) call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc, b_accel,Mesh_pointer)
-
- end subroutine prepare_GPU
+ ! sends initial data to device
+
+ ! puts acoustic initial fields onto GPU
+ if( ACOUSTIC_SIMULATION ) then
+ call transfer_fields_acoustic_to_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
+ if( SIMULATION_TYPE == 3 ) &
+ call transfer_b_fields_acoustic_to_device(NGLOB_AB,b_potential_acoustic, &
+ b_potential_dot_acoustic,b_potential_dot_dot_acoustic,Mesh_pointer)
+ endif
+
+ ! puts elastic initial fields onto GPU
+ if( ELASTIC_SIMULATION ) then
+ ! transfer forward and backward fields to device with initial values
+ call transfer_fields_to_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ if(SIMULATION_TYPE == 3) &
+ call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc, b_accel,Mesh_pointer)
+ endif
+
+ ! outputs GPU usage to files for all processes
+ call output_free_device_memory(myrank)
+
+ ! outputs usage
+ if( myrank == 0 ) then
+ call get_free_device_memory(free_mb,used_mb,total_mb)
+ write(IMAIN,*)" GPU usage: free =",free_mb," MB"
+ write(IMAIN,*)" used =",used_mb," MB"
+ write(IMAIN,*)" total =",total_mb," MB"
+ write(IMAIN,*)
+ endif
+
+ end subroutine prepare_timerun_GPU
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -514,6 +514,7 @@
real(kind=CUSTOM_REAL) :: junk
integer :: isource,ispec
integer :: irec !,irec_local
+ integer :: i,j,k,iglob
integer :: icomp,itime,nadj_files_found,nadj_files_found_tot,ier
character(len=3),dimension(NDIM) :: comp ! = (/ "BHE", "BHN", "BHZ" /)
character(len=256) :: filename
@@ -562,6 +563,34 @@
sourcearray,xigll,yigll,zigll,factor_source)
endif
+ ! point forces, initializes sourcearray, used for simplified CUDA routines
+ if(USE_FORCE_POINT_SOURCE) then
+ ! note: for use_force_point_source xi/eta/gamma are in the range [1,NGLL*]
+ iglob = ibool(nint(xi_source(isource)), &
+ nint(eta_source(isource)), &
+ nint(gamma_source(isource)), &
+ ispec)
+ ! sets sourcearrays
+ sourcearray(:,:,:,:) = 0.0
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ if( ibool(i,j,k,ispec) == iglob ) then
+ ! acoustic source
+ ! identical source array components in x,y,z-direction
+ if( ispec_is_acoustic(ispec) ) then
+ sourcearray(:,i,j,k) = 1.0
+ endif
+ ! elastic source
+ if( ispec_is_elastic(ispec) ) then
+ sourcearray(:,i,j,k) = nu_source(COMPONENT_FORCE_SOURCE,:,isource)
+ endif
+ endif
+ enddo
+ enddo
+ enddo
+ endif
+
! stores source excitations
sourcearrays(isource,:,:,:,:) = sourcearray(:,:,:,:)
@@ -746,9 +775,18 @@
double precision :: xil,etal,gammal
double precision :: xmesh,ymesh,zmesh
real(kind=CUSTOM_REAL),dimension(NGNOD) :: xelm,yelm,zelm
- integer :: ia,ispec,isource,irec,ier
- character(len=256) :: filename,filename_new,system_command
+ integer :: ia,ispec,isource,irec,ier,totalpoints
+ INTEGER(kind=4) :: system_command_status
+ character(len=256) :: filename,filename_new,system_command,system_command1,system_command2
+ ! determines number of points for vtk file
+ if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
+ totalpoints = NSOURCES + nrec
+ else
+ ! pure adjoint simulation only needs receivers
+ totalpoints = nrec
+ endif
+
if (myrank == 0) then
! vtk file
open(IOVTK,file=trim(OUTPUT_FILES)//'/sr.vtk',status='unknown',iostat=ier)
@@ -758,64 +796,66 @@
write(IOVTK,'(a)') 'Source and Receiver VTK file'
write(IOVTK,'(a)') 'ASCII'
write(IOVTK,'(a)') 'DATASET POLYDATA'
- write(IOVTK, '(a,i6,a)') 'POINTS ', NSOURCES+nrec, ' float'
+ write(IOVTK, '(a,i6,a)') 'POINTS ', totalpoints, ' float'
endif
! sources
- do isource=1,NSOURCES
- ! spectral element id
- ispec = ispec_selected_source(isource)
+ if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
+ do isource=1,NSOURCES
+ ! spectral element id
+ ispec = ispec_selected_source(isource)
- ! gets element ancor nodes
- if( myrank == islice_selected_source(isource) ) then
- ! find the coordinates of the eight corner nodes of the element
- call get_shape3D_element_corners(xelm,yelm,zelm,ispec,&
- ibool,xstore,ystore,zstore,NSPEC_AB,NGLOB_AB)
+ ! gets element ancor nodes
+ if( myrank == islice_selected_source(isource) ) then
+ ! find the coordinates of the eight corner nodes of the element
+ call get_shape3D_element_corners(xelm,yelm,zelm,ispec,&
+ ibool,xstore,ystore,zstore,NSPEC_AB,NGLOB_AB)
- endif
- ! master collects corner locations
- if( islice_selected_source(isource) /= 0 ) then
- if( myrank == 0 ) then
- call recvv_cr(xelm,NGNOD,islice_selected_source(isource),0)
- call recvv_cr(yelm,NGNOD,islice_selected_source(isource),0)
- call recvv_cr(zelm,NGNOD,islice_selected_source(isource),0)
- else if( myrank == islice_selected_source(isource) ) then
- call sendv_cr(xelm,NGNOD,0,0)
- call sendv_cr(yelm,NGNOD,0,0)
- call sendv_cr(zelm,NGNOD,0,0)
endif
- endif
-
- if( myrank == 0 ) then
- ! get the 3-D shape functions
- if( USE_FORCE_POINT_SOURCE ) then
- ! note: we switch xi,eta,gamma range to be [-1,1]
- ! uses initial guess in xi, eta and gamma
- xil = xigll(nint(xi_source(isource)))
- etal = yigll(nint(eta_source(isource)))
- gammal = zigll(nint(gamma_source(isource)))
- else
- xil = xi_source(isource)
- etal = eta_source(isource)
- gammal = gamma_source(isource)
+ ! master collects corner locations
+ if( islice_selected_source(isource) /= 0 ) then
+ if( myrank == 0 ) then
+ call recvv_cr(xelm,NGNOD,islice_selected_source(isource),0)
+ call recvv_cr(yelm,NGNOD,islice_selected_source(isource),0)
+ call recvv_cr(zelm,NGNOD,islice_selected_source(isource),0)
+ else if( myrank == islice_selected_source(isource) ) then
+ call sendv_cr(xelm,NGNOD,0,0)
+ call sendv_cr(yelm,NGNOD,0,0)
+ call sendv_cr(zelm,NGNOD,0,0)
+ endif
endif
- call get_shape3D_single(myrank,shape3D,xil,etal,gammal)
- ! interpolates source locations
- xmesh = 0.0
- ymesh = 0.0
- zmesh = 0.0
- do ia=1,NGNOD
- xmesh = xmesh + shape3D(ia)*xelm(ia)
- ymesh = ymesh + shape3D(ia)*yelm(ia)
- zmesh = zmesh + shape3D(ia)*zelm(ia)
- enddo
+ if( myrank == 0 ) then
+ ! get the 3-D shape functions
+ if( USE_FORCE_POINT_SOURCE ) then
+ ! note: we switch xi,eta,gamma range to be [-1,1]
+ ! uses initial guess in xi, eta and gamma
+ xil = xigll(nint(xi_source(isource)))
+ etal = yigll(nint(eta_source(isource)))
+ gammal = zigll(nint(gamma_source(isource)))
+ else
+ xil = xi_source(isource)
+ etal = eta_source(isource)
+ gammal = gamma_source(isource)
+ endif
+ call get_shape3D_single(myrank,shape3D,xil,etal,gammal)
- ! writes out to VTK file
- write(IOVTK,*) xmesh,ymesh,zmesh
- endif
- enddo ! NSOURCES
+ ! interpolates source locations
+ xmesh = 0.0
+ ymesh = 0.0
+ zmesh = 0.0
+ do ia=1,NGNOD
+ xmesh = xmesh + shape3D(ia)*xelm(ia)
+ ymesh = ymesh + shape3D(ia)*yelm(ia)
+ zmesh = zmesh + shape3D(ia)*zelm(ia)
+ enddo
+ ! writes out to VTK file
+ write(IOVTK,*) xmesh,ymesh,zmesh
+ endif
+ enddo ! NSOURCES
+ endif
+
! receivers
do irec=1,nrec
ispec = ispec_selected_rec(irec)
@@ -840,15 +880,9 @@
if( myrank == 0 ) then
! get the 3-D shape functions
- if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
- xil = xi_receiver(irec)
- etal = eta_receiver(irec)
- gammal = gamma_receiver(irec)
- else
- xil = xi_source(irec)
- etal = eta_source(irec)
- gammal = gamma_source(irec)
- endif
+ xil = xi_receiver(irec)
+ etal = eta_receiver(irec)
+ gammal = gamma_receiver(irec)
call get_shape3D_single(myrank,shape3D,xil,etal,gammal)
! interpolates receiver locations
@@ -872,24 +906,38 @@
close(IOVTK)
! creates additional receiver and source files
+ if( SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
! extracts receiver locations
filename = trim(OUTPUT_FILES)//'/sr.vtk'
filename_new = trim(OUTPUT_FILES)//'/receiver.vtk'
write(system_command, &
- "('awk ',a1,'{if(NR<5) print $0;if(NR==6)print ',a1,'POINTS',i6,' float',a1,';if(NR>5+',i6,')print $0}',a1,' < ',a,' > ',a)")&
+ "('awk ',a1,'{if(NR<5) print $0;if(NR==5)print ',a1,'POINTS',i6,' float',a1,';if(NR>5+',i6,')print $0}',a1,' < ',a,' > ',a)")&
"'",'"',nrec,'"',NSOURCES,"'",trim(filename),trim(filename_new)
- call system(system_command)
+ call system(system_command,system_command_status)
- ! extracts source locations
- filename_new = trim(OUTPUT_FILES)//'/source.vtk'
- write(system_command, &
- "('awk ',a1,'{if(NR< 6 + ',i6,') print $0}END{print}',a1,' < ',a,' > ',a)")&
- "'",NSOURCES,"'",trim(filename),trim(filename_new)
- call system(system_command)
+ ! extracts source locations
+ !"('awk ',a1,'{if(NR< 6 + ',i6,') print $0}END{print}',a1,' < ',a,' > ',a)")&
+ filename_new = trim(OUTPUT_FILES)//'/source.vtk'
+
+ write(system_command1, &
+ "('awk ',a1,'{if(NR<5) print $0;if(NR==5)print ',a1,'POINTS',i6,' float',a1,';')") &
+ "'",'"',NSOURCES,'"'
+ !daniel
+ !print*,'command 1:',trim(system_command1)
- endif
+ write(system_command2, &
+ "('if(NR>5 && NR <6+',i6,')print $0}END{print ',a,'}',a1,' < ',a,' > ',a)") &
+ NSOURCES,'" "',"'",trim(filename),trim(filename_new)
+
+ !print*,'command 2:',trim(system_command2)
+
+ system_command = trim(system_command1)//trim(system_command2)
+ !print*,'command:',trim(system_command)
+ call system(trim(system_command),system_command_status)
+ endif
+ endif
end subroutine setup_sources_receivers_VTKfile
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -327,8 +327,6 @@
! sets up and precomputes simulation arrays
call prepare_timerun()
-! prepare and transfer GPU arrays
- if(GPU_MODE) call prepare_GPU()
! steps through time iterations
call iterate_time()
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -30,8 +30,32 @@
use specfem_par
use specfem_par_movie
+ use specfem_par_elastic
+ use specfem_par_acoustic
implicit none
+ ! gets resulting array values onto CPU
+ if(GPU_MODE .and. &
+ ( &
+ EXTERNAL_MESH_CREATE_SHAKEMAP .or. &
+ CREATE_SHAKEMAP .or. &
+ ( MOVIE_SURFACE .and. mod(it,NTSTEP_BETWEEN_FRAMES) == 0) .or. &
+ ( MOVIE_VOLUME .and. mod(it,NTSTEP_BETWEEN_FRAMES) == 0) .or. &
+ ( PNM_GIF_IMAGE .and. mod(it,NTSTEP_BETWEEN_FRAMES) == 0) &
+ ) ) then
+ ! acoustic domains
+ if( ACOUSTIC_SIMULATION ) then
+ ! transfers whole fields
+ call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
+ endif
+ ! elastic domains
+ if( ELASTIC_SIMULATION ) then
+ ! transfers whole fields
+ call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ endif
+ endif
+
! shakemap creation
if (EXTERNAL_MESH_CREATE_SHAKEMAP) then
call wmo_create_shakemap_em()
@@ -1034,6 +1058,9 @@
character(len=3) :: channel
character(len=1) :: compx,compy,compz
+ real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable:: tmpdata
+ integer :: i,j,k,iglob
+
! gets component characters: X/Y/Z or E/N/Z
call write_channel_name(1,channel)
compx(1:1) = channel(3:3) ! either X or E
@@ -1160,6 +1187,57 @@
!write(27) velocity_movie
!close(27)
+ ! norms
+ allocate( tmpdata(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
+ if( ier /= 0 ) stop 'error allocating tmpdata arrays for movie output'
+
+ if( ELASTIC_SIMULATION ) then
+ ! norm of displacement
+ do ispec=1,NSPEC_AB
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
+ tmpdata(i,j,k,ispec) = sqrt( displ(1,iglob)**2 + displ(2,iglob)**2 + displ(3,iglob)**2 )
+ enddo
+ enddo
+ enddo
+ enddo
+ write(outputname,"('/proc',i6.6,'_displ_norm_it',i6.6,'.bin')") myrank,it
+ open(unit=27,file=trim(LOCAL_PATH)//trim(outputname),status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening file movie output velocity z'
+ write(27) tmpdata
+ close(27)
+
+ ! norm of acceleration
+ do ispec=1,NSPEC_AB
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
+ tmpdata(i,j,k,ispec) = sqrt( accel(1,iglob)**2 + accel(2,iglob)**2 + accel(3,iglob)**2 )
+ enddo
+ enddo
+ enddo
+ enddo
+ write(outputname,"('/proc',i6.6,'_accel_norm_it',i6.6,'.bin')") myrank,it
+ open(unit=27,file=trim(LOCAL_PATH)//trim(outputname),status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening file movie output velocity z'
+ write(27) tmpdata
+ close(27)
+ endif
+
+ ! norm of velocity
+ tmpdata = sqrt( velocity_x**2 + velocity_y**2 + velocity_z**2)
+
+ write(outputname,"('/proc',i6.6,'_velocity_norm_it',i6.6,'.bin')") myrank,it
+ open(unit=27,file=trim(LOCAL_PATH)//trim(outputname),status='unknown',form='unformatted',iostat=ier)
+ if( ier /= 0 ) stop 'error opening file movie output velocity z'
+ write(27) tmpdata
+ close(27)
+
+ deallocate(tmpdata)
+
endif
end subroutine wmo_movie_volume_output
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,863 +1,888 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 0
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-! April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-
- subroutine write_seismograms()
-
-! writes the seismograms with time shift
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
- implicit none
- ! local parameters
- real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ):: displ_element,veloc_element
- double precision :: dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd
- integer :: irec_local,irec
- integer :: iglob,ispec,i,j,k
- ! adjoint locals
- real(kind=CUSTOM_REAL),dimension(NDIM,NDIM):: eps_s
- real(kind=CUSTOM_REAL),dimension(NDIM):: eps_m_s
- real(kind=CUSTOM_REAL):: stf_deltat
- double precision :: stf
-
- ! this transfers fields only in elements with stations for efficiency
- if(GPU_MODE) call transfer_station_fields_from_device(displ,veloc,accel,b_displ,b_veloc,b_accel,&
- Mesh_pointer,number_receiver_global, ispec_selected_rec,ispec_selected_source,ibool,SIMULATION_TYPE)
-
- ! DEBUG: this transfers all elements in the fields, which is inefficient but guaranteed correct
- ! if(GPU_MODE) call transfer_fields_from_device(size(accel),displ,veloc,accel,Mesh_pointer)
-
- do irec_local = 1,nrec_local
-
- ! gets global number of that receiver
- irec = number_receiver_global(irec_local)
-
- ! gets local receiver interpolators
- ! (1-D Lagrange interpolators)
- hxir(:) = hxir_store(irec_local,:)
- hetar(:) = hetar_store(irec_local,:)
- hgammar(:) = hgammar_store(irec_local,:)
-
- ! forward simulations
- if (SIMULATION_TYPE == 1) then
-
- ! receiver's spectral element
- ispec = ispec_selected_rec(irec)
-
- ! elastic wave field
- if( ispec_is_elastic(ispec) ) then
- ! interpolates displ/veloc/accel at receiver locations
- call compute_interpolated_dva(displ,veloc,accel,NGLOB_AB, &
- ispec,NSPEC_AB,ibool, &
- xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
- hxir,hetar,hgammar, &
- dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
- endif !elastic
-
- ! acoustic wave field
- if( ispec_is_acoustic(ispec) ) then
- ! displacement vector
- call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
- potential_acoustic, displ_element,&
- hprime_xx,hprime_yy,hprime_zz, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- ibool,rhostore)
- ! velocity vector
- call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
- potential_dot_acoustic, veloc_element,&
- hprime_xx,hprime_yy,hprime_zz, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- ibool,rhostore)
-
- ! interpolates displ/veloc/pressure at receiver locations
- call compute_interpolated_dva_ac(displ_element,veloc_element,&
- potential_dot_dot_acoustic,potential_dot_acoustic,&
- potential_acoustic,NGLOB_AB, &
- ispec,NSPEC_AB,ibool, &
- xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
- hxir,hetar,hgammar, &
- dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
- endif ! acoustic
-
- !adjoint simulations
- else if (SIMULATION_TYPE == 2) then
-
- ! adjoint source is placed at receiver
- ispec = ispec_selected_source(irec)
-
- ! elastic wave field
- if( ispec_is_elastic(ispec) ) then
- ! interpolates displ/veloc/accel at receiver locations
- call compute_interpolated_dva(displ,veloc,accel,NGLOB_AB, &
- ispec,NSPEC_AB,ibool, &
- xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
- hxir,hetar,hgammar, &
- dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
-
- ! stores elements displacement field
- do k = 1,NGLLZ
- do j = 1,NGLLY
- do i = 1,NGLLX
- iglob = ibool(i,j,k,ispec)
- displ_element(:,i,j,k) = displ(:,iglob)
- enddo
- enddo
- enddo
-
- ! gets derivatives of local receiver interpolators
- hpxir(:) = hpxir_store(irec_local,:)
- hpetar(:) = hpetar_store(irec_local,:)
- hpgammar(:) = hpgammar_store(irec_local,:)
-
- ! computes the integrated derivatives of source parameters (M_jk and X_s)
- call compute_adj_source_frechet(displ_element,Mxx(irec),Myy(irec),Mzz(irec),&
- Mxy(irec),Mxz(irec),Myz(irec),eps_s,eps_m_s, &
- hxir,hetar,hgammar,hpxir,hpetar,hpgammar, &
- hprime_xx,hprime_yy,hprime_zz, &
- xix(:,:,:,ispec),xiy(:,:,:,ispec),xiz(:,:,:,ispec), &
- etax(:,:,:,ispec),etay(:,:,:,ispec),etaz(:,:,:,ispec), &
- gammax(:,:,:,ispec),gammay(:,:,:,ispec),gammaz(:,:,:,ispec))
-
- stf = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(irec),hdur_gaussian(irec))
- stf_deltat = stf * deltat
- Mxx_der(irec_local) = Mxx_der(irec_local) + eps_s(1,1) * stf_deltat
- Myy_der(irec_local) = Myy_der(irec_local) + eps_s(2,2) * stf_deltat
- Mzz_der(irec_local) = Mzz_der(irec_local) + eps_s(3,3) * stf_deltat
- Mxy_der(irec_local) = Mxy_der(irec_local) + 2 * eps_s(1,2) * stf_deltat
- Mxz_der(irec_local) = Mxz_der(irec_local) + 2 * eps_s(1,3) * stf_deltat
- Myz_der(irec_local) = Myz_der(irec_local) + 2 * eps_s(2,3) * stf_deltat
-
- sloc_der(:,irec_local) = sloc_der(:,irec_local) + eps_m_s(:) * stf_deltat
- endif ! elastic
-
- ! acoustic wave field
- if( ispec_is_acoustic(ispec) ) then
- ! displacement vector
- call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
- potential_acoustic, displ_element,&
- hprime_xx,hprime_yy,hprime_zz, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- ibool,rhostore)
- ! velocity vector
- call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
- potential_dot_acoustic, veloc_element,&
- hprime_xx,hprime_yy,hprime_zz, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- ibool,rhostore)
-
- ! interpolates displ/veloc/pressure at receiver locations
- call compute_interpolated_dva_ac(displ_element,veloc_element,&
- potential_dot_dot_acoustic,potential_dot_acoustic,&
- potential_acoustic,NGLOB_AB, &
- ispec,NSPEC_AB,ibool, &
- xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
- hxir,hetar,hgammar, &
- dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
- endif ! acoustic
-
- !adjoint simulations
- else if (SIMULATION_TYPE == 3) then
-
- ispec = ispec_selected_rec(irec)
-
- ! elastic wave field
- if( ispec_is_elastic(ispec) ) then
- ! backward fields: interpolates displ/veloc/accel at receiver locations
- call compute_interpolated_dva(b_displ,b_veloc,b_accel,NGLOB_ADJOINT,&
- ispec,NSPEC_AB,ibool, &
- xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
- hxir,hetar,hgammar, &
- dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
- endif ! elastic
-
- ! acoustic wave field
- if( ispec_is_acoustic(ispec) ) then
- ! backward fields: displacement vector
- call compute_gradient(ispec,NSPEC_AB,NGLOB_ADJOINT, &
- b_potential_acoustic, displ_element,&
- hprime_xx,hprime_yy,hprime_zz, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- ibool,rhostore)
- ! backward fields: velocity vector
- call compute_gradient(ispec,NSPEC_AB,NGLOB_ADJOINT, &
- b_potential_dot_acoustic, veloc_element,&
- hprime_xx,hprime_yy,hprime_zz, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- ibool,rhostore)
-
- ! backward fields: interpolates displ/veloc/pressure at receiver locations
- call compute_interpolated_dva_ac(displ_element,veloc_element,&
- b_potential_dot_dot_acoustic,b_potential_dot_acoustic,&
- b_potential_acoustic,NGLOB_ADJOINT, &
- ispec,NSPEC_AB,ibool, &
- xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
- hxir,hetar,hgammar, &
- dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
- endif ! acoustic
-
- endif ! SIMULATION_TYPE
-
-! store North, East and Vertical components
-! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- seismograms_d(:,irec_local,it) = sngl((nu(:,1,irec)*dxd + nu(:,2,irec)*dyd + nu(:,3,irec)*dzd))
- seismograms_v(:,irec_local,it) = sngl((nu(:,1,irec)*vxd + nu(:,2,irec)*vyd + nu(:,3,irec)*vzd))
- seismograms_a(:,irec_local,it) = sngl((nu(:,1,irec)*axd + nu(:,2,irec)*ayd + nu(:,3,irec)*azd))
- else
- seismograms_d(:,irec_local,it) = (nu(:,1,irec)*dxd + nu(:,2,irec)*dyd + nu(:,3,irec)*dzd)
- seismograms_v(:,irec_local,it) = (nu(:,1,irec)*vxd + nu(:,2,irec)*vyd + nu(:,3,irec)*vzd)
- seismograms_a(:,irec_local,it) = (nu(:,1,irec)*axd + nu(:,2,irec)*ayd + nu(:,3,irec)*azd)
- endif
-
- !adjoint simulations
- if (SIMULATION_TYPE == 2) seismograms_eps(:,:,irec_local,it) = eps_s(:,:)
-
- enddo ! nrec_local
-
-! write the current or final seismograms
- if((mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0 .or. it == NSTEP) .and. (.not.SU_FORMAT)) then
- if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
- call write_seismograms_to_file(myrank,seismograms_d,number_receiver_global,station_name, &
- network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,1,SIMULATION_TYPE)
- call write_seismograms_to_file(myrank,seismograms_v,number_receiver_global,station_name, &
- network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,2,SIMULATION_TYPE)
- call write_seismograms_to_file(myrank,seismograms_a,number_receiver_global,station_name, &
- network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,3,SIMULATION_TYPE)
- else
- call write_adj_seismograms_to_file(myrank,seismograms_d,number_receiver_global, &
- nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,1)
- endif
- endif
-
-! write ONE binary file for all receivers (nrec_local) within one proc
-! SU format, with 240-byte-header for each trace
- if ((mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0 .or. it==NSTEP) .and. SU_FORMAT) &
- call write_seismograms_su()
-
- end subroutine write_seismograms
-
-
-!================================================================
-
-
-! write seismograms to text files
-
- subroutine write_seismograms_to_file(myrank,seismograms,number_receiver_global, &
- station_name,network_name,nrec,nrec_local, &
- it,DT,NSTEP,t0,LOCAL_PATH,istore,SIMULATION_TYPE)
-
- implicit none
-
- include "constants.h"
-
- integer :: NSTEP,it
- integer :: nrec,nrec_local
- integer :: myrank,istore
- integer :: SIMULATION_TYPE
-
- integer, dimension(nrec_local) :: number_receiver_global
- real(kind=CUSTOM_REAL), dimension(NDIM,nrec_local,NSTEP) :: seismograms
-
- double precision t0,DT
-
- character(len=256) LOCAL_PATH
- character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
- character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
-
- ! local parameters
- integer irec,irec_local
- integer irecord
-
- character(len=1) component
-
- ! parameters for master collects seismograms
- real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: one_seismogram
- integer :: nrec_local_received,NPROCTOT,total_seismos,receiver,sender
- integer :: iproc,ier
-
- ! saves displacement, velocity or acceleration
- if(istore == 1) then
- component = 'd'
- else if(istore == 2) then
- component = 'v'
- else if(istore == 3) then
- component = 'a'
- else
- call exit_MPI(myrank,'wrong component to save for seismograms')
- endif
-
- allocate(one_seismogram(NDIM,NSTEP),stat=ier)
- if(ier /= 0) stop 'error while allocating one temporary seismogram'
-
- ! all processes write their local seismograms themselves
- if( .not. WRITE_SEISMOGRAMS_BY_MASTER ) then
-
- ! loop on all the local receivers
- do irec_local = 1,nrec_local
-
- ! get global number of that receiver
- irec = number_receiver_global(irec_local)
-
- ! save three components of displacement vector
- irecord = 1
-
- ! writes out this seismogram
- one_seismogram = seismograms(:,irec_local,:)
-
- call write_one_seismogram(one_seismogram,irec, &
- station_name,network_name,nrec, &
- DT,t0,it,NSTEP,SIMULATION_TYPE, &
- myrank,irecord,component,LOCAL_PATH)
-
- enddo ! nrec_local
-
-! now only the master process does the writing of seismograms and
-! collects the data from all other processes
- else ! WRITE_SEISMOGRAMS_BY_MASTER
-
- if(myrank == 0) then ! on the master, gather all the seismograms
-
- total_seismos = 0
-
- ! loop on all the slices
- call world_size(NPROCTOT)
- do iproc = 0,NPROCTOT-1
-
- ! receive except from proc 0, which is me and therefore I already have this value
- sender = iproc
- if(iproc /= 0) then
- call recv_i(nrec_local_received,1,sender,itag)
- if(nrec_local_received < 0) call exit_MPI(myrank,'error while receiving local number of receivers')
- else
- nrec_local_received = nrec_local
- endif
-
- if (nrec_local_received > 0) then
- do irec_local = 1,nrec_local_received
- ! receive except from proc 0, which is myself and therefore I already have these values
- if(iproc == 0) then
- ! get global number of that receiver
- irec = number_receiver_global(irec_local)
- one_seismogram(:,:) = seismograms(:,irec_local,:)
- else
- call recv_i(irec,1,sender,itag)
- if(irec < 1 .or. irec > nrec) call exit_MPI(myrank,'error while receiving global receiver number')
-
- call recvv_cr(one_seismogram,NDIM*NSTEP,sender,itag)
- endif
-
- total_seismos = total_seismos + 1
-
- ! save three components of displacement vector
- irecord = 1
-
- ! writes out this seismogram
- call write_one_seismogram(one_seismogram,irec, &
- station_name,network_name,nrec, &
- DT,t0,it,NSTEP,SIMULATION_TYPE, &
- myrank,irecord,component,LOCAL_PATH)
-
- enddo ! nrec_local_received
- endif ! if(nrec_local_received > 0 )
- enddo ! NPROCTOT-1
-
- write(IMAIN,*) 'Component: .sem'//component
- write(IMAIN,*) ' total number of receivers saved is ',total_seismos,' out of ',nrec
- write(IMAIN,*)
-
- if(total_seismos /= nrec) call exit_MPI(myrank,'incorrect total number of receivers saved')
-
- else ! on the nodes, send the seismograms to the master
- receiver = 0
- call send_i(nrec_local,1,receiver,itag)
- if (nrec_local > 0) then
- do irec_local = 1,nrec_local
- ! get global number of that receiver
- irec = number_receiver_global(irec_local)
- call send_i(irec,1,receiver,itag)
-
- ! sends seismogram of that receiver
- one_seismogram(:,:) = seismograms(:,irec_local,:)
- call sendv_cr(one_seismogram,NDIM*NSTEP,receiver,itag)
- enddo
- endif
- endif ! myrank
-
- endif ! WRITE_SEISMOGRAMS_BY_MASTER
-
- deallocate(one_seismogram)
-
- end subroutine write_seismograms_to_file
-
-!=====================================================================
-
- subroutine write_one_seismogram(one_seismogram,irec, &
- station_name,network_name,nrec, &
- DT,t0,it,NSTEP,SIMULATION_TYPE, &
- myrank,irecord,component,LOCAL_PATH)
-
- implicit none
-
- include "constants.h"
-
- integer :: NSTEP,it,SIMULATION_TYPE
- real(kind=CUSTOM_REAL), dimension(NDIM,NSTEP) :: one_seismogram
-
- integer myrank,irecord
- double precision t0,DT
-
- integer :: nrec,irec
- character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
- character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
- character(len=1) component
- character(len=256) LOCAL_PATH
-
- ! local parameters
- integer iorientation
- integer length_station_name,length_network_name
- character(len=256) sisname,clean_LOCAL_PATH,final_LOCAL_PATH
- character(len=3) channel
-
- ! loops over each seismogram component
- do iorientation = 1,NDIM
-
- ! gets channel name
- call write_channel_name(iorientation,channel)
-
- ! create the name of the seismogram file for each slice
- ! file name includes the name of the station, the network and the component
- length_station_name = len_trim(station_name(irec))
- length_network_name = len_trim(network_name(irec))
-
- ! check that length conforms to standard
- if(length_station_name < 1 .or. length_station_name > MAX_LENGTH_STATION_NAME) &
- call exit_MPI(myrank,'wrong length of station name')
-
- if(length_network_name < 1 .or. length_network_name > MAX_LENGTH_NETWORK_NAME) &
- call exit_MPI(myrank,'wrong length of network name')
-
- write(sisname,"(a,'.',a,'.',a3,'.sem',a1)") station_name(irec)(1:length_station_name),&
- network_name(irec)(1:length_network_name),channel,component
-
- ! directory to store seismograms
- if( USE_OUTPUT_FILES_PATH ) then
- final_LOCAL_PATH = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)) // '/'
- else
- ! suppress white spaces if any
- clean_LOCAL_PATH = adjustl(LOCAL_PATH)
- ! create full final local path
- final_LOCAL_PATH = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // '/'
- endif
-
- ! ASCII output format
- call write_output_ASCII(one_seismogram, &
- NSTEP,it,SIMULATION_TYPE,DT,t0,myrank, &
- iorientation,irecord,sisname,final_LOCAL_PATH)
-
- enddo ! do iorientation
-
- end subroutine write_one_seismogram
-
-!=====================================================================
-
-! write adjoint seismograms (displacement) to text files
-
- subroutine write_adj_seismograms_to_file(myrank,seismograms,number_receiver_global, &
- nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,istore)
-
- implicit none
-
- include "constants.h"
-
- integer nrec_local,NSTEP,it,myrank,istore
- integer, dimension(nrec_local) :: number_receiver_global
- real(kind=CUSTOM_REAL), dimension(NDIM,nrec_local,NSTEP) :: seismograms
- double precision t0,DT
- character(len=256) LOCAL_PATH
-
-
- integer irec,irec_local
- integer iorientation,irecord,isample
-
- character(len=3) channel
- character(len=1) component
- character(len=256) sisname,clean_LOCAL_PATH,final_LOCAL_PATH
-
-! save displacement, velocity or acceleration
- if(istore == 1) then
- component = 'd'
- else if(istore == 2) then
- component = 'v'
- else if(istore == 3) then
- component = 'a'
- else
- call exit_MPI(myrank,'wrong component to save for seismograms')
- endif
-
- do irec_local = 1,nrec_local
-
- ! get global number of that receiver
- irec = number_receiver_global(irec_local)
-
- ! save three components of displacement vector
- irecord = 1
-
- do iorientation = 1,NDIM
-
- ! gets channel name
- call write_channel_name(iorientation,channel)
-
- ! create the name of the seismogram file for each slice
- ! file name includes the name of the station, the network and the component
- write(sisname,"(a,i5.5,'.',a,'.',a3,'.sem',a1)") 'S',irec_local,&
- 'NT',channel,component
-
- ! directory to store seismograms
- if( USE_OUTPUT_FILES_PATH ) then
- final_LOCAL_PATH = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)) // '/'
- else
- ! suppress white spaces if any
- clean_LOCAL_PATH = adjustl(LOCAL_PATH)
- ! create full final local path
- final_LOCAL_PATH = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // '/'
- endif
-
-
- ! save seismograms in text format with no subsampling.
- ! Because we do not subsample the output, this can result in large files
- ! if the simulation uses many time steps. However, subsampling the output
- ! here would result in a loss of accuracy when one later convolves
- ! the results with the source time function
- open(unit=IOUT,file=final_LOCAL_PATH(1:len_trim(final_LOCAL_PATH))//sisname(1:len_trim(sisname)),status='unknown')
-
- ! make sure we never write more than the maximum number of time steps
- ! subtract half duration of the source to make sure travel time is correct
- do isample = 1,min(it,NSTEP)
- if(irecord == 1) then
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- write(IOUT,*) sngl(dble(isample-1)*DT - t0),' ',seismograms(iorientation,irec_local,isample)
- else
- write(IOUT,*) dble(isample-1)*DT - t0,' ',seismograms(iorientation,irec_local,isample)
- endif
- else
- call exit_MPI(myrank,'incorrect record label')
- endif
- enddo
-
- close(IOUT)
-
- enddo
-
- enddo
-
- end subroutine write_adj_seismograms_to_file
-
-!=====================================================================
-
-! write adjoint seismograms (strain) to text files
-
- subroutine write_adj_seismograms2_to_file(myrank,seismograms,number_receiver_global, &
- nrec_local,it,DT,NSTEP,t0,LOCAL_PATH)
-
- implicit none
-
- include "constants.h"
-
- integer nrec_local,NSTEP,it,myrank
- integer, dimension(nrec_local) :: number_receiver_global
- real(kind=CUSTOM_REAL), dimension(NDIM,NDIM,nrec_local,NSTEP) :: seismograms
- double precision t0,DT
- character(len=256) LOCAL_PATH
-
-
- integer irec,irec_local
- integer idim,jdim,irecord,isample
-
- character(len=4) chn
- character(len=1) component
- character(len=256) sisname,clean_LOCAL_PATH,final_LOCAL_PATH
-
- do irec_local = 1,nrec_local
-
- ! get global number of that receiver
- irec = number_receiver_global(irec_local)
-
- ! save three components of displacement vector
- irecord = 1
-
- do idim = 1, 3
- do jdim = idim, 3
-
- if(idim == 1 .and. jdim == 1) then
- chn = 'SNN'
- else if(idim == 1 .and. jdim == 2) then
- chn = 'SEN'
- else if(idim == 1 .and. jdim == 3) then
- chn = 'SEZ'
- else if(idim == 2 .and. jdim == 2) then
- chn = 'SEE'
- else if(idim == 2 .and. jdim == 3) then
- chn = 'SNZ'
- else if(idim == 3 .and. jdim == 3) then
- chn = 'SZZ'
- else
- call exit_MPI(myrank,'incorrect channel value')
- endif
-
- ! create the name of the seismogram file for each slice
- ! file name includes the name of the station, the network and the component
- write(sisname,"(a,i5.5,'.',a,'.',a3,'.sem',a1)") 'S',irec_local,&
- 'NT',chn,component
-
- ! directory to store seismograms
- if( USE_OUTPUT_FILES_PATH ) then
- final_LOCAL_PATH = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)) // '/'
- else
- ! suppress white spaces if any
- clean_LOCAL_PATH = adjustl(LOCAL_PATH)
- ! create full final local path
- final_LOCAL_PATH = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // '/'
- endif
-
- ! save seismograms in text format with no subsampling.
- ! Because we do not subsample the output, this can result in large files
- ! if the simulation uses many time steps. However, subsampling the output
- ! here would result in a loss of accuracy when one later convolves
- ! the results with the source time function
- open(unit=IOUT,file=final_LOCAL_PATH(1:len_trim(final_LOCAL_PATH))//sisname(1:len_trim(sisname)),status='unknown')
-
- ! make sure we never write more than the maximum number of time steps
- ! subtract half duration of the source to make sure travel time is correct
- do isample = 1,min(it,NSTEP)
- if(irecord == 1) then
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- write(IOUT,*) sngl(dble(isample-1)*DT - t0),' ',seismograms(jdim,idim,irec_local,isample)
- else
- write(IOUT,*) dble(isample-1)*DT - t0,' ',seismograms(jdim,idim,irec_local,isample)
- endif
- else
- call exit_MPI(myrank,'incorrect record label')
- endif
- enddo
-
- close(IOUT)
-
- enddo ! jdim
- enddo ! idim
- enddo ! irec_local
-
-end subroutine write_adj_seismograms2_to_file
-
-!=====================================================================
-
-subroutine write_channel_name(iorientation,channel)
-
- use specfem_par,only: DT,SUPPRESS_UTM_PROJECTION
- implicit none
-
- integer :: iorientation
- character(len=3) :: channel
-
- ! local parameters
- character(len=2) :: bic
- double precision:: sampling_rate
-
- ! gets band and instrument code
- sampling_rate = DT
- call band_instrument_code(sampling_rate,bic)
-
- ! sets channel name
- if( SUPPRESS_UTM_PROJECTION ) then
-
- ! no UTM, pure Cartesian reference
- ! uses Cartesian X/Y/Z direction to denote channel
- select case(iorientation)
- case(1)
- channel = bic(1:2)//'X'
- case(2)
- channel = bic(1:2)//'Y'
- case(3)
- channel = bic(1:2)//'Z'
- case default
- call exit_mpi(0,'error channel orientation value')
- end select
-
- else
-
- ! UTM conversion
- ! uses convention for N/E/Z to denote channel
- select case(iorientation)
- case(1)
- channel = bic(1:2)//'E'
- case(2)
- channel = bic(1:2)//'N'
- case(3)
- channel = bic(1:2)//'Z'
- case default
- call exit_mpi(0,'error channel orientation value')
- end select
-
- endif
-
-end subroutine write_channel_name
-
-!=====================================================================
-
-subroutine band_instrument_code(DT,bic)
- ! This subroutine is to choose the appropriate band and instrument codes for channel names of seismograms
- ! based on the IRIS convention (first two letters of channel codes, respectively,
- ! which were LH(Z/E/N) previously).
- ! For consistency with observed data, we now use the IRIS convention for band codes (first letter in channel codes) of
- ! SEM seismograms governed by their sampling rate.
- ! Instrument code (second letter in channel codes) is fixed to "X" which is assigned by IRIS for synthetic seismograms.
- ! See the manual for further explanations!
- ! Ebru, November 2010
- implicit none
- double precision :: DT
- character(len=2) :: bic
- ! local parameter
- logical,parameter :: SUPPRESS_IRIS_CONVENTION = .false.
-
- ! see manual for ranges
- if (DT .ge. 1.0d0) bic = 'LX'
- if (DT .lt. 1.0d0 .and. DT .gt. 0.1d0) bic = 'MX'
- if (DT .le. 0.1d0 .and. DT .gt. 0.0125d0) bic = 'BX'
- if (DT .le. 0.0125d0 .and. DT .gt. 0.004d0) bic = 'HX'
- if (DT .le. 0.004d0 .and. DT .gt. 0.001d0) bic = 'CX'
- if (DT .le. 0.001d0) bic = 'FX'
-
- ! ignores IRIS convention, uses previous, constant band and instrument code
- if( SUPPRESS_IRIS_CONVENTION ) then
- bic = 'BH'
- endif
-
- end subroutine band_instrument_code
-
-!=====================================================================
-
- subroutine write_seismograms_su()
-
- use specfem_par
- use specfem_par_acoustic
- use specfem_par_elastic
- use specfem_par_poroelastic
-
- implicit none
-
- character(len=256) procname,final_LOCAL_PATH
- integer :: irec_local,irec,ios
- real :: x_station,y_station
-
- ! headers
- integer,parameter :: nheader=240 ! 240 bytes
- integer(kind=2) :: i2head(nheader/2) ! 2-byte-integer
- integer(kind=4) :: i4head(nheader/4) ! 4-byte-integer
- real(kind=4) :: r4head(nheader/4) ! 4-byte-real
- equivalence (i2head,i4head,r4head) ! share the same 240-byte-memory
-
- double precision, allocatable, dimension(:) :: stlat,stlon,stele,stbur,stutm_x,stutm_y,elevation
- double precision, allocatable, dimension(:) :: x_found,y_found,z_found
- double precision :: x_found_source,y_found_source,z_found_source
-
- allocate(x_found(nrec))
- allocate(y_found(nrec))
- allocate(z_found(nrec))
- open(unit=IIN_SU1,file=trim(OUTPUT_FILES)//'/output_list_stations.txt',status='unknown')
- do irec=1,nrec
- read(IIN_SU1,*) x_found(irec),y_found(irec),z_found(irec)
- enddo
- close(IIN_SU1)
- open(unit=IIN_SU1,file=trim(OUTPUT_FILES)//'/output_list_sources.txt',status='unknown')
- read(IIN_SU1,*) x_found_source,y_found_source,z_found_source
- close(IIN_SU1)
- ! directory to store seismograms
- if( USE_OUTPUT_FILES_PATH ) then
- final_LOCAL_PATH = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)) // '/'
- else
- ! create full final local path
- final_LOCAL_PATH = trim(adjustl(LOCAL_PATH)) // '/'
- endif
- write(procname,"(i4)") myrank
-
- ! write seismograms (dx)
- open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dx_SU' ,&
- form='unformatted', access='direct', recl=240+4*(NSTEP))
- do irec_local = 1,nrec_local
- irec = number_receiver_global(irec_local)
- i4head(1) =irec
- i4head(11) =z_found(irec)
- i4head(13) =z_found_source
- i4head(19) =x_found_source !utm_x_source(1)
- i4head(20) =y_found_source !utm_y_source(1)
- i4head(21) =x_found(irec) !stutm_x(irec)
- i4head(22) =y_found(irec) !stutm_y(irec)
- i2head(58) =NSTEP
- i2head(59) =DT*1.0d6
- write(IOUT_SU,rec=irec_local) r4head, seismograms_d(1,irec_local,:)
- enddo
- close(IOUT_SU)
- ! write seismograms (dy)
- open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dy_SU' ,&
- form='unformatted', access='direct', recl=240+4*(NSTEP))
- do irec_local = 1,nrec_local
- irec = number_receiver_global(irec_local)
- i4head(1) =irec
- i4head(11) =z_found(irec)
- i4head(13) =z_found_source
- i4head(19) =x_found_source !utm_x_source(1)
- i4head(20) =y_found_source !utm_y_source(1)
- i4head(21) =x_found(irec) !stutm_x(irec)
- i4head(22) =y_found(irec) !stutm_y(irec)
- i2head(58) =NSTEP
- i2head(59) =DT*1.0d6
- write(IOUT_SU,rec=irec_local) r4head, seismograms_d(2,irec_local,:)
- enddo
- close(IOUT_SU)
-
- ! write seismograms (dz)
- open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dz_SU' ,&
- form='unformatted', access='direct', recl=240+4*(NSTEP))
- do irec_local = 1,nrec_local
- irec = number_receiver_global(irec_local)
- i4head(1) =irec
- i4head(11) =z_found(irec)
- i4head(13) =z_found_source
- i4head(19) =x_found_source !utm_x_source(1)
- i4head(20) =y_found_source !utm_y_source(1)
- i4head(21) =x_found(irec) !stutm_x(irec)
- i4head(22) =y_found(irec) !stutm_y(irec)
- i2head(58) =NSTEP
- i2head(59) =DT*1.0d6
- write(IOUT_SU,rec=irec_local) r4head, seismograms_d(3,irec_local,:)
- enddo
- close(IOUT_SU)
-
- end subroutine write_seismograms_su
-
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+ subroutine write_seismograms()
+
+! writes the seismograms with time shift
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+ implicit none
+ ! local parameters
+ real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ):: displ_element,veloc_element
+ double precision :: dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd
+ integer :: irec_local,irec
+ integer :: iglob,ispec,i,j,k
+ ! adjoint locals
+ real(kind=CUSTOM_REAL),dimension(NDIM,NDIM):: eps_s
+ real(kind=CUSTOM_REAL),dimension(NDIM):: eps_m_s
+ real(kind=CUSTOM_REAL):: stf_deltat
+ double precision :: stf
+
+ ! gets resulting array values onto CPU
+ if(GPU_MODE) then
+ ! this transfers fields only in elements with stations for efficiency
+ if( ACOUSTIC_SIMULATION ) then
+ ! only copy corresponding elements to CPU host
+ ! timing: Elapsed time: 5.230904e-04
+ call transfer_station_fields_acoustic_from_device( &
+ potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
+ b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
+ Mesh_pointer,number_receiver_global, &
+ ispec_selected_rec,ispec_selected_source,ibool,SIMULATION_TYPE)
+
+ ! alternative: transfers whole fields
+ ! timing: Elapsed time: 4.138947e-03
+ !call transfer_fields_acoustic_from_device(NGLOB_AB,potential_acoustic, &
+ ! potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
+ endif
+
+ ! this transfers fields only in elements with stations for efficiency
+ if( ELASTIC_SIMULATION ) then
+ call transfer_station_fields_from_device( &
+ displ,veloc,accel, &
+ b_displ,b_veloc,b_accel, &
+ Mesh_pointer,number_receiver_global, &
+ ispec_selected_rec,ispec_selected_source, &
+ ibool,SIMULATION_TYPE)
+
+ ! alternative: transfers whole fields
+ ! call transfer_fields_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+ endif
+ endif
+
+ do irec_local = 1,nrec_local
+
+ ! gets global number of that receiver
+ irec = number_receiver_global(irec_local)
+
+ ! gets local receiver interpolators
+ ! (1-D Lagrange interpolators)
+ hxir(:) = hxir_store(irec_local,:)
+ hetar(:) = hetar_store(irec_local,:)
+ hgammar(:) = hgammar_store(irec_local,:)
+
+ ! forward simulations
+ if (SIMULATION_TYPE == 1) then
+
+ ! receiver's spectral element
+ ispec = ispec_selected_rec(irec)
+
+ ! elastic wave field
+ if( ispec_is_elastic(ispec) ) then
+ ! interpolates displ/veloc/accel at receiver locations
+ call compute_interpolated_dva(displ,veloc,accel,NGLOB_AB, &
+ ispec,NSPEC_AB,ibool, &
+ xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
+ hxir,hetar,hgammar, &
+ dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
+ endif !elastic
+
+ ! acoustic wave field
+ if( ispec_is_acoustic(ispec) ) then
+ ! displacement vector
+ call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+ potential_acoustic, displ_element,&
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ ibool,rhostore)
+ ! velocity vector
+ call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+ potential_dot_acoustic, veloc_element,&
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ ibool,rhostore)
+
+ ! interpolates displ/veloc/pressure at receiver locations
+ call compute_interpolated_dva_ac(displ_element,veloc_element,&
+ potential_dot_dot_acoustic,potential_dot_acoustic,&
+ potential_acoustic,NGLOB_AB, &
+ ispec,NSPEC_AB,ibool, &
+ xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
+ hxir,hetar,hgammar, &
+ dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
+ endif ! acoustic
+
+ !adjoint simulations
+ else if (SIMULATION_TYPE == 2) then
+
+ ! adjoint source is placed at receiver
+ ispec = ispec_selected_source(irec)
+
+ ! elastic wave field
+ if( ispec_is_elastic(ispec) ) then
+ ! interpolates displ/veloc/accel at receiver locations
+ call compute_interpolated_dva(displ,veloc,accel,NGLOB_AB, &
+ ispec,NSPEC_AB,ibool, &
+ xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
+ hxir,hetar,hgammar, &
+ dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
+
+ ! stores elements displacement field
+ do k = 1,NGLLZ
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ iglob = ibool(i,j,k,ispec)
+ displ_element(:,i,j,k) = displ(:,iglob)
+ enddo
+ enddo
+ enddo
+
+ ! gets derivatives of local receiver interpolators
+ hpxir(:) = hpxir_store(irec_local,:)
+ hpetar(:) = hpetar_store(irec_local,:)
+ hpgammar(:) = hpgammar_store(irec_local,:)
+
+ ! computes the integrated derivatives of source parameters (M_jk and X_s)
+ call compute_adj_source_frechet(displ_element,Mxx(irec),Myy(irec),Mzz(irec),&
+ Mxy(irec),Mxz(irec),Myz(irec),eps_s,eps_m_s, &
+ hxir,hetar,hgammar,hpxir,hpetar,hpgammar, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix(:,:,:,ispec),xiy(:,:,:,ispec),xiz(:,:,:,ispec), &
+ etax(:,:,:,ispec),etay(:,:,:,ispec),etaz(:,:,:,ispec), &
+ gammax(:,:,:,ispec),gammay(:,:,:,ispec),gammaz(:,:,:,ispec))
+
+ stf = comp_source_time_function(dble(NSTEP-it)*DT-t0-tshift_cmt(irec),hdur_gaussian(irec))
+ stf_deltat = stf * deltat
+ Mxx_der(irec_local) = Mxx_der(irec_local) + eps_s(1,1) * stf_deltat
+ Myy_der(irec_local) = Myy_der(irec_local) + eps_s(2,2) * stf_deltat
+ Mzz_der(irec_local) = Mzz_der(irec_local) + eps_s(3,3) * stf_deltat
+ Mxy_der(irec_local) = Mxy_der(irec_local) + 2 * eps_s(1,2) * stf_deltat
+ Mxz_der(irec_local) = Mxz_der(irec_local) + 2 * eps_s(1,3) * stf_deltat
+ Myz_der(irec_local) = Myz_der(irec_local) + 2 * eps_s(2,3) * stf_deltat
+
+ sloc_der(:,irec_local) = sloc_der(:,irec_local) + eps_m_s(:) * stf_deltat
+ endif ! elastic
+
+ ! acoustic wave field
+ if( ispec_is_acoustic(ispec) ) then
+ ! displacement vector
+ call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+ potential_acoustic, displ_element,&
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ ibool,rhostore)
+ ! velocity vector
+ call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+ potential_dot_acoustic, veloc_element,&
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ ibool,rhostore)
+
+ ! interpolates displ/veloc/pressure at receiver locations
+ call compute_interpolated_dva_ac(displ_element,veloc_element,&
+ potential_dot_dot_acoustic,potential_dot_acoustic,&
+ potential_acoustic,NGLOB_AB, &
+ ispec,NSPEC_AB,ibool, &
+ xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
+ hxir,hetar,hgammar, &
+ dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
+ endif ! acoustic
+
+ !adjoint simulations
+ else if (SIMULATION_TYPE == 3) then
+
+ ispec = ispec_selected_rec(irec)
+
+ ! elastic wave field
+ if( ispec_is_elastic(ispec) ) then
+ ! backward fields: interpolates displ/veloc/accel at receiver locations
+ call compute_interpolated_dva(b_displ,b_veloc,b_accel,NGLOB_ADJOINT,&
+ ispec,NSPEC_AB,ibool, &
+ xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
+ hxir,hetar,hgammar, &
+ dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
+ endif ! elastic
+
+ ! acoustic wave field
+ if( ispec_is_acoustic(ispec) ) then
+ ! backward fields: displacement vector
+ call compute_gradient(ispec,NSPEC_AB,NGLOB_ADJOINT, &
+ b_potential_acoustic, displ_element,&
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ ibool,rhostore)
+ ! backward fields: velocity vector
+ call compute_gradient(ispec,NSPEC_AB,NGLOB_ADJOINT, &
+ b_potential_dot_acoustic, veloc_element,&
+ hprime_xx,hprime_yy,hprime_zz, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ ibool,rhostore)
+
+ ! backward fields: interpolates displ/veloc/pressure at receiver locations
+ call compute_interpolated_dva_ac(displ_element,veloc_element,&
+ b_potential_dot_dot_acoustic,b_potential_dot_acoustic,&
+ b_potential_acoustic,NGLOB_ADJOINT, &
+ ispec,NSPEC_AB,ibool, &
+ xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
+ hxir,hetar,hgammar, &
+ dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
+ endif ! acoustic
+
+ endif ! SIMULATION_TYPE
+
+! store North, East and Vertical components
+! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ seismograms_d(:,irec_local,it) = sngl((nu(:,1,irec)*dxd + nu(:,2,irec)*dyd + nu(:,3,irec)*dzd))
+ seismograms_v(:,irec_local,it) = sngl((nu(:,1,irec)*vxd + nu(:,2,irec)*vyd + nu(:,3,irec)*vzd))
+ seismograms_a(:,irec_local,it) = sngl((nu(:,1,irec)*axd + nu(:,2,irec)*ayd + nu(:,3,irec)*azd))
+ else
+ seismograms_d(:,irec_local,it) = (nu(:,1,irec)*dxd + nu(:,2,irec)*dyd + nu(:,3,irec)*dzd)
+ seismograms_v(:,irec_local,it) = (nu(:,1,irec)*vxd + nu(:,2,irec)*vyd + nu(:,3,irec)*vzd)
+ seismograms_a(:,irec_local,it) = (nu(:,1,irec)*axd + nu(:,2,irec)*ayd + nu(:,3,irec)*azd)
+ endif
+
+ !adjoint simulations
+ if (SIMULATION_TYPE == 2) seismograms_eps(:,:,irec_local,it) = eps_s(:,:)
+
+ enddo ! nrec_local
+
+! write the current or final seismograms
+ if((mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0 .or. it == NSTEP) .and. (.not.SU_FORMAT)) then
+ if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
+ call write_seismograms_to_file(myrank,seismograms_d,number_receiver_global,station_name, &
+ network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,1,SIMULATION_TYPE)
+ call write_seismograms_to_file(myrank,seismograms_v,number_receiver_global,station_name, &
+ network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,2,SIMULATION_TYPE)
+ call write_seismograms_to_file(myrank,seismograms_a,number_receiver_global,station_name, &
+ network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,3,SIMULATION_TYPE)
+ else
+ call write_adj_seismograms_to_file(myrank,seismograms_d,number_receiver_global, &
+ nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,1)
+ endif
+ endif
+
+! write ONE binary file for all receivers (nrec_local) within one proc
+! SU format, with 240-byte-header for each trace
+ if ((mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0 .or. it==NSTEP) .and. SU_FORMAT) &
+ call write_seismograms_su()
+
+ end subroutine write_seismograms
+
+
+!================================================================
+
+
+! write seismograms to text files
+
+ subroutine write_seismograms_to_file(myrank,seismograms,number_receiver_global, &
+ station_name,network_name,nrec,nrec_local, &
+ it,DT,NSTEP,t0,LOCAL_PATH,istore,SIMULATION_TYPE)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NSTEP,it
+ integer :: nrec,nrec_local
+ integer :: myrank,istore
+ integer :: SIMULATION_TYPE
+
+ integer, dimension(nrec_local) :: number_receiver_global
+ real(kind=CUSTOM_REAL), dimension(NDIM,nrec_local,NSTEP) :: seismograms
+
+ double precision t0,DT
+
+ character(len=256) LOCAL_PATH
+ character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
+ character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
+
+ ! local parameters
+ integer irec,irec_local
+ integer irecord
+
+ character(len=1) component
+
+ ! parameters for master collects seismograms
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: one_seismogram
+ integer :: nrec_local_received,NPROCTOT,total_seismos,receiver,sender
+ integer :: iproc,ier
+
+ ! saves displacement, velocity or acceleration
+ if(istore == 1) then
+ component = 'd'
+ else if(istore == 2) then
+ component = 'v'
+ else if(istore == 3) then
+ component = 'a'
+ else
+ call exit_MPI(myrank,'wrong component to save for seismograms')
+ endif
+
+ allocate(one_seismogram(NDIM,NSTEP),stat=ier)
+ if(ier /= 0) stop 'error while allocating one temporary seismogram'
+
+ ! all processes write their local seismograms themselves
+ if( .not. WRITE_SEISMOGRAMS_BY_MASTER ) then
+
+ ! loop on all the local receivers
+ do irec_local = 1,nrec_local
+
+ ! get global number of that receiver
+ irec = number_receiver_global(irec_local)
+
+ ! save three components of displacement vector
+ irecord = 1
+
+ ! writes out this seismogram
+ one_seismogram = seismograms(:,irec_local,:)
+
+ call write_one_seismogram(one_seismogram,irec, &
+ station_name,network_name,nrec, &
+ DT,t0,it,NSTEP,SIMULATION_TYPE, &
+ myrank,irecord,component,LOCAL_PATH)
+
+ enddo ! nrec_local
+
+! now only the master process does the writing of seismograms and
+! collects the data from all other processes
+ else ! WRITE_SEISMOGRAMS_BY_MASTER
+
+ if(myrank == 0) then ! on the master, gather all the seismograms
+
+ total_seismos = 0
+
+ ! loop on all the slices
+ call world_size(NPROCTOT)
+ do iproc = 0,NPROCTOT-1
+
+ ! receive except from proc 0, which is me and therefore I already have this value
+ sender = iproc
+ if(iproc /= 0) then
+ call recv_i(nrec_local_received,1,sender,itag)
+ if(nrec_local_received < 0) call exit_MPI(myrank,'error while receiving local number of receivers')
+ else
+ nrec_local_received = nrec_local
+ endif
+
+ if (nrec_local_received > 0) then
+ do irec_local = 1,nrec_local_received
+ ! receive except from proc 0, which is myself and therefore I already have these values
+ if(iproc == 0) then
+ ! get global number of that receiver
+ irec = number_receiver_global(irec_local)
+ one_seismogram(:,:) = seismograms(:,irec_local,:)
+ else
+ call recv_i(irec,1,sender,itag)
+ if(irec < 1 .or. irec > nrec) call exit_MPI(myrank,'error while receiving global receiver number')
+
+ call recvv_cr(one_seismogram,NDIM*NSTEP,sender,itag)
+ endif
+
+ total_seismos = total_seismos + 1
+
+ ! save three components of displacement vector
+ irecord = 1
+
+ ! writes out this seismogram
+ call write_one_seismogram(one_seismogram,irec, &
+ station_name,network_name,nrec, &
+ DT,t0,it,NSTEP,SIMULATION_TYPE, &
+ myrank,irecord,component,LOCAL_PATH)
+
+ enddo ! nrec_local_received
+ endif ! if(nrec_local_received > 0 )
+ enddo ! NPROCTOT-1
+
+ write(IMAIN,*) 'Component: .sem'//component
+ write(IMAIN,*) ' total number of receivers saved is ',total_seismos,' out of ',nrec
+ write(IMAIN,*)
+
+ if(total_seismos /= nrec) call exit_MPI(myrank,'incorrect total number of receivers saved')
+
+ else ! on the nodes, send the seismograms to the master
+ receiver = 0
+ call send_i(nrec_local,1,receiver,itag)
+ if (nrec_local > 0) then
+ do irec_local = 1,nrec_local
+ ! get global number of that receiver
+ irec = number_receiver_global(irec_local)
+ call send_i(irec,1,receiver,itag)
+
+ ! sends seismogram of that receiver
+ one_seismogram(:,:) = seismograms(:,irec_local,:)
+ call sendv_cr(one_seismogram,NDIM*NSTEP,receiver,itag)
+ enddo
+ endif
+ endif ! myrank
+
+ endif ! WRITE_SEISMOGRAMS_BY_MASTER
+
+ deallocate(one_seismogram)
+
+ end subroutine write_seismograms_to_file
+
+!=====================================================================
+
+ subroutine write_one_seismogram(one_seismogram,irec, &
+ station_name,network_name,nrec, &
+ DT,t0,it,NSTEP,SIMULATION_TYPE, &
+ myrank,irecord,component,LOCAL_PATH)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NSTEP,it,SIMULATION_TYPE
+ real(kind=CUSTOM_REAL), dimension(NDIM,NSTEP) :: one_seismogram
+
+ integer myrank,irecord
+ double precision t0,DT
+
+ integer :: nrec,irec
+ character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
+ character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
+ character(len=1) component
+ character(len=256) LOCAL_PATH
+
+ ! local parameters
+ integer iorientation
+ integer length_station_name,length_network_name
+ character(len=256) sisname,clean_LOCAL_PATH,final_LOCAL_PATH
+ character(len=3) channel
+
+ ! loops over each seismogram component
+ do iorientation = 1,NDIM
+
+ ! gets channel name
+ call write_channel_name(iorientation,channel)
+
+ ! create the name of the seismogram file for each slice
+ ! file name includes the name of the station, the network and the component
+ length_station_name = len_trim(station_name(irec))
+ length_network_name = len_trim(network_name(irec))
+
+ ! check that length conforms to standard
+ if(length_station_name < 1 .or. length_station_name > MAX_LENGTH_STATION_NAME) &
+ call exit_MPI(myrank,'wrong length of station name')
+
+ if(length_network_name < 1 .or. length_network_name > MAX_LENGTH_NETWORK_NAME) &
+ call exit_MPI(myrank,'wrong length of network name')
+
+ write(sisname,"(a,'.',a,'.',a3,'.sem',a1)") station_name(irec)(1:length_station_name),&
+ network_name(irec)(1:length_network_name),channel,component
+
+ ! directory to store seismograms
+ if( USE_OUTPUT_FILES_PATH ) then
+ final_LOCAL_PATH = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)) // '/'
+ else
+ ! suppress white spaces if any
+ clean_LOCAL_PATH = adjustl(LOCAL_PATH)
+ ! create full final local path
+ final_LOCAL_PATH = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // '/'
+ endif
+
+ ! ASCII output format
+ call write_output_ASCII(one_seismogram, &
+ NSTEP,it,SIMULATION_TYPE,DT,t0,myrank, &
+ iorientation,irecord,sisname,final_LOCAL_PATH)
+
+ enddo ! do iorientation
+
+ end subroutine write_one_seismogram
+
+!=====================================================================
+
+! write adjoint seismograms (displacement) to text files
+
+ subroutine write_adj_seismograms_to_file(myrank,seismograms,number_receiver_global, &
+ nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,istore)
+
+ implicit none
+
+ include "constants.h"
+
+ integer nrec_local,NSTEP,it,myrank,istore
+ integer, dimension(nrec_local) :: number_receiver_global
+ real(kind=CUSTOM_REAL), dimension(NDIM,nrec_local,NSTEP) :: seismograms
+ double precision t0,DT
+ character(len=256) LOCAL_PATH
+
+
+ integer irec,irec_local
+ integer iorientation,irecord,isample
+
+ character(len=3) channel
+ character(len=1) component
+ character(len=256) sisname,clean_LOCAL_PATH,final_LOCAL_PATH
+
+! save displacement, velocity or acceleration
+ if(istore == 1) then
+ component = 'd'
+ else if(istore == 2) then
+ component = 'v'
+ else if(istore == 3) then
+ component = 'a'
+ else
+ call exit_MPI(myrank,'wrong component to save for seismograms')
+ endif
+
+ do irec_local = 1,nrec_local
+
+ ! get global number of that receiver
+ irec = number_receiver_global(irec_local)
+
+ ! save three components of displacement vector
+ irecord = 1
+
+ do iorientation = 1,NDIM
+
+ ! gets channel name
+ call write_channel_name(iorientation,channel)
+
+ ! create the name of the seismogram file for each slice
+ ! file name includes the name of the station, the network and the component
+ write(sisname,"(a,i5.5,'.',a,'.',a3,'.sem',a1)") 'S',irec_local,&
+ 'NT',channel,component
+
+ ! directory to store seismograms
+ if( USE_OUTPUT_FILES_PATH ) then
+ final_LOCAL_PATH = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)) // '/'
+ else
+ ! suppress white spaces if any
+ clean_LOCAL_PATH = adjustl(LOCAL_PATH)
+ ! create full final local path
+ final_LOCAL_PATH = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // '/'
+ endif
+
+
+ ! save seismograms in text format with no subsampling.
+ ! Because we do not subsample the output, this can result in large files
+ ! if the simulation uses many time steps. However, subsampling the output
+ ! here would result in a loss of accuracy when one later convolves
+ ! the results with the source time function
+ open(unit=IOUT,file=final_LOCAL_PATH(1:len_trim(final_LOCAL_PATH))//sisname(1:len_trim(sisname)),status='unknown')
+
+ ! make sure we never write more than the maximum number of time steps
+ ! subtract half duration of the source to make sure travel time is correct
+ do isample = 1,min(it,NSTEP)
+ if(irecord == 1) then
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ write(IOUT,*) sngl(dble(isample-1)*DT - t0),' ',seismograms(iorientation,irec_local,isample)
+ else
+ write(IOUT,*) dble(isample-1)*DT - t0,' ',seismograms(iorientation,irec_local,isample)
+ endif
+ else
+ call exit_MPI(myrank,'incorrect record label')
+ endif
+ enddo
+
+ close(IOUT)
+
+ enddo
+
+ enddo
+
+ end subroutine write_adj_seismograms_to_file
+
+!=====================================================================
+
+! write adjoint seismograms (strain) to text files
+
+ subroutine write_adj_seismograms2_to_file(myrank,seismograms,number_receiver_global, &
+ nrec_local,it,DT,NSTEP,t0,LOCAL_PATH)
+
+ implicit none
+
+ include "constants.h"
+
+ integer nrec_local,NSTEP,it,myrank
+ integer, dimension(nrec_local) :: number_receiver_global
+ real(kind=CUSTOM_REAL), dimension(NDIM,NDIM,nrec_local,NSTEP) :: seismograms
+ double precision t0,DT
+ character(len=256) LOCAL_PATH
+
+
+ integer irec,irec_local
+ integer idim,jdim,irecord,isample
+
+ character(len=4) chn
+ character(len=1) component
+ character(len=256) sisname,clean_LOCAL_PATH,final_LOCAL_PATH
+
+ do irec_local = 1,nrec_local
+
+ ! get global number of that receiver
+ irec = number_receiver_global(irec_local)
+
+ ! save three components of displacement vector
+ irecord = 1
+
+ do idim = 1, 3
+ do jdim = idim, 3
+
+ if(idim == 1 .and. jdim == 1) then
+ chn = 'SNN'
+ else if(idim == 1 .and. jdim == 2) then
+ chn = 'SEN'
+ else if(idim == 1 .and. jdim == 3) then
+ chn = 'SEZ'
+ else if(idim == 2 .and. jdim == 2) then
+ chn = 'SEE'
+ else if(idim == 2 .and. jdim == 3) then
+ chn = 'SNZ'
+ else if(idim == 3 .and. jdim == 3) then
+ chn = 'SZZ'
+ else
+ call exit_MPI(myrank,'incorrect channel value')
+ endif
+
+ ! create the name of the seismogram file for each slice
+ ! file name includes the name of the station, the network and the component
+ write(sisname,"(a,i5.5,'.',a,'.',a3,'.sem',a1)") 'S',irec_local,&
+ 'NT',chn,component
+
+ ! directory to store seismograms
+ if( USE_OUTPUT_FILES_PATH ) then
+ final_LOCAL_PATH = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)) // '/'
+ else
+ ! suppress white spaces if any
+ clean_LOCAL_PATH = adjustl(LOCAL_PATH)
+ ! create full final local path
+ final_LOCAL_PATH = clean_LOCAL_PATH(1:len_trim(clean_LOCAL_PATH)) // '/'
+ endif
+
+ ! save seismograms in text format with no subsampling.
+ ! Because we do not subsample the output, this can result in large files
+ ! if the simulation uses many time steps. However, subsampling the output
+ ! here would result in a loss of accuracy when one later convolves
+ ! the results with the source time function
+ open(unit=IOUT,file=final_LOCAL_PATH(1:len_trim(final_LOCAL_PATH))//sisname(1:len_trim(sisname)),status='unknown')
+
+ ! make sure we never write more than the maximum number of time steps
+ ! subtract half duration of the source to make sure travel time is correct
+ do isample = 1,min(it,NSTEP)
+ if(irecord == 1) then
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ write(IOUT,*) sngl(dble(isample-1)*DT - t0),' ',seismograms(jdim,idim,irec_local,isample)
+ else
+ write(IOUT,*) dble(isample-1)*DT - t0,' ',seismograms(jdim,idim,irec_local,isample)
+ endif
+ else
+ call exit_MPI(myrank,'incorrect record label')
+ endif
+ enddo
+
+ close(IOUT)
+
+ enddo ! jdim
+ enddo ! idim
+ enddo ! irec_local
+
+end subroutine write_adj_seismograms2_to_file
+
+!=====================================================================
+
+subroutine write_channel_name(iorientation,channel)
+
+ use specfem_par,only: DT,SUPPRESS_UTM_PROJECTION
+ implicit none
+
+ integer :: iorientation
+ character(len=3) :: channel
+
+ ! local parameters
+ character(len=2) :: bic
+ double precision:: sampling_rate
+
+ ! gets band and instrument code
+ sampling_rate = DT
+ call band_instrument_code(sampling_rate,bic)
+
+ ! sets channel name
+ if( SUPPRESS_UTM_PROJECTION ) then
+
+ ! no UTM, pure Cartesian reference
+ ! uses Cartesian X/Y/Z direction to denote channel
+ select case(iorientation)
+ case(1)
+ channel = bic(1:2)//'X'
+ case(2)
+ channel = bic(1:2)//'Y'
+ case(3)
+ channel = bic(1:2)//'Z'
+ case default
+ call exit_mpi(0,'error channel orientation value')
+ end select
+
+ else
+
+ ! UTM conversion
+ ! uses convention for N/E/Z to denote channel
+ select case(iorientation)
+ case(1)
+ channel = bic(1:2)//'E'
+ case(2)
+ channel = bic(1:2)//'N'
+ case(3)
+ channel = bic(1:2)//'Z'
+ case default
+ call exit_mpi(0,'error channel orientation value')
+ end select
+
+ endif
+
+end subroutine write_channel_name
+
+!=====================================================================
+
+subroutine band_instrument_code(DT,bic)
+ ! This subroutine is to choose the appropriate band and instrument codes for channel names of seismograms
+ ! based on the IRIS convention (first two letters of channel codes, respectively,
+ ! which were LH(Z/E/N) previously).
+ ! For consistency with observed data, we now use the IRIS convention for band codes (first letter in channel codes) of
+ ! SEM seismograms governed by their sampling rate.
+ ! Instrument code (second letter in channel codes) is fixed to "X" which is assigned by IRIS for synthetic seismograms.
+ ! See the manual for further explanations!
+ ! Ebru, November 2010
+ implicit none
+ double precision :: DT
+ character(len=2) :: bic
+ ! local parameter
+ logical,parameter :: SUPPRESS_IRIS_CONVENTION = .false.
+
+ ! see manual for ranges
+ if (DT .ge. 1.0d0) bic = 'LX'
+ if (DT .lt. 1.0d0 .and. DT .gt. 0.1d0) bic = 'MX'
+ if (DT .le. 0.1d0 .and. DT .gt. 0.0125d0) bic = 'BX'
+ if (DT .le. 0.0125d0 .and. DT .gt. 0.004d0) bic = 'HX'
+ if (DT .le. 0.004d0 .and. DT .gt. 0.001d0) bic = 'CX'
+ if (DT .le. 0.001d0) bic = 'FX'
+
+ ! ignores IRIS convention, uses previous, constant band and instrument code
+ if( SUPPRESS_IRIS_CONVENTION ) then
+ bic = 'BH'
+ endif
+
+ end subroutine band_instrument_code
+
+!=====================================================================
+
+ subroutine write_seismograms_su()
+
+ use specfem_par
+ use specfem_par_acoustic
+ use specfem_par_elastic
+ use specfem_par_poroelastic
+
+ implicit none
+
+ character(len=256) procname,final_LOCAL_PATH
+ integer :: irec_local,irec,ios
+ real :: x_station,y_station
+
+ ! headers
+ integer,parameter :: nheader=240 ! 240 bytes
+ integer(kind=2) :: i2head(nheader/2) ! 2-byte-integer
+ integer(kind=4) :: i4head(nheader/4) ! 4-byte-integer
+ real(kind=4) :: r4head(nheader/4) ! 4-byte-real
+ equivalence (i2head,i4head,r4head) ! share the same 240-byte-memory
+
+ double precision, allocatable, dimension(:) :: stlat,stlon,stele,stbur,stutm_x,stutm_y,elevation
+ double precision, allocatable, dimension(:) :: x_found,y_found,z_found
+ double precision :: x_found_source,y_found_source,z_found_source
+
+ allocate(x_found(nrec))
+ allocate(y_found(nrec))
+ allocate(z_found(nrec))
+ open(unit=IIN_SU1,file=trim(OUTPUT_FILES)//'/output_list_stations.txt',status='unknown')
+ do irec=1,nrec
+ read(IIN_SU1,*) x_found(irec),y_found(irec),z_found(irec)
+ enddo
+ close(IIN_SU1)
+ open(unit=IIN_SU1,file=trim(OUTPUT_FILES)//'/output_list_sources.txt',status='unknown')
+ read(IIN_SU1,*) x_found_source,y_found_source,z_found_source
+ close(IIN_SU1)
+ ! directory to store seismograms
+ if( USE_OUTPUT_FILES_PATH ) then
+ final_LOCAL_PATH = OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)) // '/'
+ else
+ ! create full final local path
+ final_LOCAL_PATH = trim(adjustl(LOCAL_PATH)) // '/'
+ endif
+ write(procname,"(i4)") myrank
+
+ ! write seismograms (dx)
+ open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dx_SU' ,&
+ form='unformatted', access='direct', recl=240+4*(NSTEP))
+ do irec_local = 1,nrec_local
+ irec = number_receiver_global(irec_local)
+ i4head(1) =irec
+ i4head(11) =z_found(irec)
+ i4head(13) =z_found_source
+ i4head(19) =x_found_source !utm_x_source(1)
+ i4head(20) =y_found_source !utm_y_source(1)
+ i4head(21) =x_found(irec) !stutm_x(irec)
+ i4head(22) =y_found(irec) !stutm_y(irec)
+ i2head(58) =NSTEP
+ i2head(59) =DT*1.0d6
+ write(IOUT_SU,rec=irec_local) r4head, seismograms_d(1,irec_local,:)
+ enddo
+ close(IOUT_SU)
+ ! write seismograms (dy)
+ open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dy_SU' ,&
+ form='unformatted', access='direct', recl=240+4*(NSTEP))
+ do irec_local = 1,nrec_local
+ irec = number_receiver_global(irec_local)
+ i4head(1) =irec
+ i4head(11) =z_found(irec)
+ i4head(13) =z_found_source
+ i4head(19) =x_found_source !utm_x_source(1)
+ i4head(20) =y_found_source !utm_y_source(1)
+ i4head(21) =x_found(irec) !stutm_x(irec)
+ i4head(22) =y_found(irec) !stutm_y(irec)
+ i2head(58) =NSTEP
+ i2head(59) =DT*1.0d6
+ write(IOUT_SU,rec=irec_local) r4head, seismograms_d(2,irec_local,:)
+ enddo
+ close(IOUT_SU)
+
+ ! write seismograms (dz)
+ open(unit=IOUT_SU, file=trim(adjustl(final_LOCAL_PATH))//trim(adjustl(procname))//'_dz_SU' ,&
+ form='unformatted', access='direct', recl=240+4*(NSTEP))
+ do irec_local = 1,nrec_local
+ irec = number_receiver_global(irec_local)
+ i4head(1) =irec
+ i4head(11) =z_found(irec)
+ i4head(13) =z_found_source
+ i4head(19) =x_found_source !utm_x_source(1)
+ i4head(20) =y_found_source !utm_y_source(1)
+ i4head(21) =x_found(irec) !stutm_x(irec)
+ i4head(22) =y_found(irec) !stutm_y(irec)
+ i2head(58) =NSTEP
+ i2head(59) =DT*1.0d6
+ write(IOUT_SU,rec=irec_local) r4head, seismograms_d(3,irec_local,:)
+ enddo
+ close(IOUT_SU)
+
+ end subroutine write_seismograms_su
+
Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms_cuda.cu 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms_cuda.cu 2011-10-06 03:31:24 UTC (rev 19027)
@@ -1,168 +0,0 @@
-#include <stdio.h>
-#include <cuda.h>
-#include <cublas.h>
-#include <mpi.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "mesh_constants_cuda.h"
-
-#define INDEX2(xsize,x,y) x + (y)*xsize
-#define INDEX3(xsize,ysize,x,y,z) x + (y)*xsize + (z)*xsize*ysize
-#define INDEX4(xsize,ysize,zsize,x,y,z,i) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize
-#define INDEX5(xsize,ysize,zsize,isize,x,y,z,i,j) x + (y)*xsize + (z)*xsize*ysize + (i)*xsize*ysize*zsize + (j)*xsize*ysize*zsize*isize
-
-
-#define ENABLE_VERY_SLOW_ERROR_CHECKING
-
-
-__global__ void transfer_stations_fields_from_device_kernel(int* number_receiver_global,
- int* ispec_selected_rec,
- int* ibool,
- float* station_seismo_field,
- float* desired_field,
- int nrec_local,int* debug_index) {
- int blockID = blockIdx.x + blockIdx.y*gridDim.x;
- if(blockID<nrec_local) {
- int nodeID = threadIdx.x + blockID*blockDim.x;
- int irec = number_receiver_global[blockID]-1;
- int ispec = ispec_selected_rec[irec]-1; // ispec==0 before -1???
- // if(threadIdx.x==1 && blockID < 125) {
- // // debug_index[threadIdx.x] = threadIdx.x + 125*ispec;
- // debug_index[blockID] = ispec;
- // debug_index[blockID + 4] = irec;
- // debug_index[blockID + 8] = ispec_selected_rec[0];
- // debug_index[blockID + 9] = ispec_selected_rec[1];
- // debug_index[blockID +10] = ispec_selected_rec[2];
- // debug_index[blockID +11] = ispec_selected_rec[3];
- // debug_index[blockID +12] = ispec_selected_rec[4];
- // }
- int iglob = ibool[threadIdx.x + 125*ispec]-1;
- station_seismo_field[3*125*blockID + 3*threadIdx.x+0] = desired_field[3*iglob];
- station_seismo_field[3*125*blockID + 3*threadIdx.x+1] = desired_field[3*iglob+1];
- station_seismo_field[3*125*blockID + 3*threadIdx.x+2] = desired_field[3*iglob+2];
- }
-}
-
-extern "C" void pause_for_debuger(int);
-
-void transfer_field_from_device(Mesh* mp, float* d_field,float* h_field,
- int* number_receiver_global,
- int* d_ispec_selected,
- int* h_ispec_selected,
- int* ibool) {
-
- int blocksize = 125;
- int num_blocks_x = mp->nrec_local;
- int num_blocks_y = 1;
- int myrank;
- MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
- while(num_blocks_x > 65535) {
- num_blocks_x = ceil(num_blocks_x/2.0);
- num_blocks_y = num_blocks_y*2;
- }
-
- dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(blocksize,1,1);
-
- int* d_debug_index,*h_debug_index;
- //cudaMalloc((void**)&d_debug_index,125*sizeof(int));
- //h_debug_index = (int*)calloc(125,sizeof(int));
- //cudaMemcpy(d_debug_index,h_debug_index,125*sizeof(int),cudaMemcpyHostToDevice);
-
-
- // prepare field transfer array on device
- transfer_stations_fields_from_device_kernel<<<grid,threads>>>(mp->d_number_receiver_global,
- d_ispec_selected,
- mp->d_ibool,
- mp->d_station_seismo_field,
- d_field,
- mp->nrec_local,d_debug_index);
-
- //cudaMemcpy(h_debug_index,d_debug_index,125*sizeof(int),cudaMemcpyDeviceToHost);
-
- // pause_for_debug(1);
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("transfer_stations_fields_from_device_kernel");
-
- // // sync and check to catch errors from previous async operations
- // cudaThreadSynchronize();
- // cudaError_t err = cudaGetLastError();
- // if (err != cudaSuccess)
- // {
- // fprintf(stderr,"Error launching/running transfer_stations_fields_from_device_kernel: %s\n", cudaGetErrorString(err));
- // exit(1);
- // }
-#endif
-
- cudaMemcpy(mp->h_station_seismo_field,mp->d_station_seismo_field,
- (3*125)*(mp->nrec_local)*sizeof(float),cudaMemcpyDeviceToHost);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- // sync and check to catch errors from previous async operations
- exit_on_cuda_error("transfer_stations_fields_from_device_kernel_memcpy");
- // cudaThreadSynchronize();
- // err = cudaGetLastError();
- // if (err != cudaSuccess)
- // {
- // fprintf(stderr,"Error launching/running transfer_stations_fields_from_device_kernel_memcpy: %s\n", cudaGetErrorString(err));
- // exit(1);
- // }
-#endif
-
- // pause_for_debug(1);
- int irec_local;
-
- for(irec_local=0;irec_local<mp->nrec_local;irec_local++) {
- int irec = number_receiver_global[irec_local]-1;
- int ispec = h_ispec_selected[irec]-1;
-
- for(int i=0;i<125;i++) {
- int iglob = ibool[i+125*ispec]-1;
- h_field[0+3*iglob] = mp->h_station_seismo_field[0+3*i+irec_local*125*3];
- h_field[1+3*iglob] = mp->h_station_seismo_field[1+3*i+irec_local*125*3];
- h_field[2+3*iglob] = mp->h_station_seismo_field[2+3*i+irec_local*125*3];
- }
-
- }
-}
-
-extern "C" void transfer_station_fields_from_device_(float* displ,float* veloc,float* accel,
- float* b_displ, float* b_veloc, float* b_accel,
- long* Mesh_pointer_f,int* number_receiver_global,
- int* ispec_selected_rec,int* ispec_selected_source,
- int* ibool,int* SIMULATION_TYPEf) {
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
-
-
- int SIMULATION_TYPE = *SIMULATION_TYPEf;
-
- if(SIMULATION_TYPE == 1) {
- transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- }
- else if(SIMULATION_TYPE == 2) {
- transfer_field_from_device(mp,mp->d_displ,displ, number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
- transfer_field_from_device(mp,mp->d_veloc,veloc, number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
- transfer_field_from_device(mp,mp->d_accel,accel, number_receiver_global,
- mp->d_ispec_selected_source, ispec_selected_source, ibool);
- }
- else if(SIMULATION_TYPE == 3) {
- transfer_field_from_device(mp,mp->d_b_displ,b_displ, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_from_device(mp,mp->d_b_veloc,b_veloc, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- transfer_field_from_device(mp,mp->d_b_accel,b_accel, number_receiver_global,
- mp->d_ispec_selected_rec, ispec_selected_rec, ibool);
- }
-
-}
-
-// extern "C" void save_seismogram(char*
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/create_movie_GMT/Par_file
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/create_movie_GMT/Par_file 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/create_movie_GMT/Par_file 2011-10-06 03:31:24 UTC (rev 19027)
@@ -65,4 +65,6 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/files_needed_asteroid/DATA/Par_file
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/files_needed_asteroid/DATA/Par_file 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/files_needed_asteroid/DATA/Par_file 2011-10-06 03:31:24 UTC (rev 19027)
@@ -61,3 +61,5 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_Carcione_copper_ori_r2d2_serial
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_Carcione_copper_ori_r2d2_serial 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_Carcione_copper_ori_r2d2_serial 2011-10-06 03:31:24 UTC (rev 19027)
@@ -61,3 +61,5 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_Carcione_copper_very_small_serial
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_Carcione_copper_very_small_serial 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_Carcione_copper_very_small_serial 2011-10-06 03:31:24 UTC (rev 19027)
@@ -61,3 +61,5 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_gros
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_gros 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_gros 2011-10-06 03:31:24 UTC (rev 19027)
@@ -61,3 +61,5 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_petit
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_petit 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_petit 2011-10-06 03:31:24 UTC (rev 19027)
@@ -61,3 +61,5 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_small_4
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_small_4 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_EGEE_small_4 2011-10-06 03:31:24 UTC (rev 19027)
@@ -64,3 +64,5 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_SC_288
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_SC_288 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_SC_288 2011-10-06 03:31:24 UTC (rev 19027)
@@ -65,4 +65,6 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_SC_384
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_SC_384 2011-10-06 02:48:17 UTC (rev 19026)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/unused_routines/from_old_DATA/par_files/Par_file_SC_384 2011-10-06 03:31:24 UTC (rev 19027)
@@ -65,4 +65,6 @@
# print source time function
PRINT_SOURCE_TIME_FUNCTION = .false.
+# set to true to use GPUs
+GPU_MODE = .false.
More information about the CIG-COMMITS
mailing list