[cig-commits] r19680 - in seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER: . examples/homogeneous_halfspace m4 src/cuda src/generate_databases src/shared src/specfem3D utils

danielpeter at geodynamics.org danielpeter at geodynamics.org
Fri Feb 24 17:37:34 PST 2012


Author: danielpeter
Date: 2012-02-24 17:37:33 -0800 (Fri, 24 Feb 2012)
New Revision: 19680

Added:
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/m4/cit_openmp.m4
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.F90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/create_specfem3D_gpu_cuda_method_stubs.pl
Removed:
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
Modified:
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure.ac
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/process.sh
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/check_mesh_resolution.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/serial.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
Log:
updates configuration files to use ./configure --with-openmp for OpenMP version; renames file endings to compute_forces_elastic.F90 and prepare_timerun.F90; updates preparations to use openmp routine also without mesh coloring

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure	2012-02-25 01:37:33 UTC (rev 19680)
@@ -639,11 +639,13 @@
 
 ac_subst_vars='LTLIBOBJS
 LIBOBJS
+NVCC
 PYTHON_EGG_LDFLAGS
 PYTHON_EGG_CPPFLAGS
 PYTHON_EGG_CFLAGS
 PYTHONPATH
 LOCAL_PATH_IS_ALSO_GLOBAL
+OPENMP_LIB
 MPI_INC
 CUDA_INC
 CUDA_LIB
@@ -711,6 +713,8 @@
 PYTHON_PREFIX
 PYTHON_VERSION
 PYTHON
+COND_OPENMP_FALSE
+COND_OPENMP_TRUE
 COND_CUDA_FALSE
 COND_CUDA_TRUE
 CUSTOM_MPI_TYPE
@@ -763,6 +767,7 @@
 with_mpi
 enable_double_precision
 with_cuda
+with_openmp
 with_scotch_dir
 with_scotch_includedir
 with_scotch_libdir
@@ -792,6 +797,7 @@
 CUDA_LIB
 CUDA_INC
 MPI_INC
+OPENMP_LIB
 LOCAL_PATH_IS_ALSO_GLOBAL
 PYTHON
 PYTHONPATH'
@@ -1429,6 +1435,7 @@
   --with-pyre             build Pyrized version [default=no]
   --with-mpi              build parallel version [default=yes]
   --with-cuda             build cuda GPU enabled version [default=no]
+  --with-openmp           build OpenMP enabled version [default=no]
   --with-scotch-dir       define the root path to Scotch (e.g. /opt/scotch/)
   --with-scotch-includedir
                           define the path to the Scotch headers (e.g.
@@ -1470,6 +1477,7 @@
   CUDA_INC    Location of CUDA include files
   MPI_INC     Location of MPI include mpi.h, which is needed by nvcc when
               compiling cuda files
+  OPENMP_LIB  Location of extra OpenMP libraries
   LOCAL_PATH_IS_ALSO_GLOBAL
               files on a local path on each node are also seen as global with
               same path [default=true]
@@ -2001,6 +2009,23 @@
 
 
 
+# Check whether --with-openmp was given.
+if test "${with_openmp+set}" = set; then
+  withval=$with_openmp; want_openmp="$withval"
+else
+  want_openmp=no
+fi
+
+ if test "$want_openmp" = yes; then
+  COND_OPENMP_TRUE=
+  COND_OPENMP_FALSE='#'
+else
+  COND_OPENMP_TRUE='#'
+  COND_OPENMP_FALSE=
+fi
+
+
+
 # Checks for programs.
 
 if test "$want_pyre" = yes; then
@@ -7423,6 +7448,7 @@
 fi
 
 
+
 if test x"$LOCAL_PATH_IS_ALSO_GLOBAL" = x; then
     LOCAL_PATH_IS_ALSO_GLOBAL=true
 fi
@@ -7848,10 +7874,163 @@
 
 fi
 
+if test "$want_cuda" = yes; then
 
+  # Extract the first word of "nvcc", so it can be a program name with args.
+set dummy nvcc; ac_word=$2
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_path_NVCC+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  case $NVCC in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_NVCC="$as_dir/$ac_word$ac_exec_ext"
+    $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+NVCC=$ac_cv_path_NVCC
+if test -n "$NVCC"; then
+  { $as_echo "$as_me:$LINENO: result: $NVCC" >&5
+$as_echo "$NVCC" >&6; }
+else
+  { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  if test -z "$NVCC" ; then
+    { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ { $as_echo "$as_me:$LINENO: error: cannot find 'nvcc' program.
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot find 'nvcc' program.
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }; }
+    NVCC=`echo "Error: nvcc is not installed." ; false`
+  fi
+
+fi
+
+if test "$want_openmp" = yes; then
+
+ac_ext=${ac_fc_srcext-f}
+ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
+ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_fc_compiler_gnu
+
+cit_fc_save_fc=$FC
+cit_fc_save_fcflags=$FCFLAGS
+FC=$FC
+FCFLAGS="$FCFLAGS $FLAGS_NO_CHECK"
+
+{ $as_echo "$as_me:$LINENO: checking whether OpenMP directives work" >&5
+$as_echo_n "checking whether OpenMP directives work... " >&6; }
+
+#AC_COMPILE_IFELSE(_CIT_FC_TRIVIAL_OPENMP_PROGRAM, [
+#    AC_MSG_RESULT(yes)
+#], [
+#    AC_MSG_RESULT(no)
+#    AC_MSG_FAILURE([cannot compile a trivial OpenMP program using $FC])
+#])
+
+cat >conftest.$ac_ext <<_ACEOF
+
+      program main
+
+       implicit none
+       integer OMP_get_thread_num
+       integer OMP_GET_MAX_THREADS
+       integer NUM_THREADS
+       integer thread_id
+
+       NUM_THREADS = OMP_GET_MAX_THREADS()
+       !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(thread_id)
+       thread_id = OMP_get_thread_num()+1
+       !$OMP END PARALLEL
+
+      end
+
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_fc_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 $as_test_x conftest$ac_exeext
+       }; then
+
+    { $as_echo "$as_me:$LINENO: result: yes" >&5
+$as_echo "yes" >&6; }
+
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+    { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+    { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ { $as_echo "$as_me:$LINENO: error: cannot link a trivial OpenMP program using $FC with flags: $FLAGS_NO_CHECK
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot link a trivial OpenMP program using $FC with flags: $FLAGS_NO_CHECK
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }; }
+
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+
+FC=$cit_fc_save_fc
+FCFLAGS=$cit_fc_save_fcflags
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+
+
 # Checks for library functions.
 
-
 ac_config_files="$ac_config_files Makefile in_data_files/Par_file in_data_files/CMTSOLUTION in_data_files/STATIONS src/meshfem3D/Makefile src/meshfem3D/constants.h src/meshfem3D/precision.h src/decompose_mesh_SCOTCH/Makefile src/specfem3D/Makefile src/generate_databases/Makefile src/shared/constants.h src/shared/precision.h src/decompose_mesh_SCOTCH/scotch_5.1.11/src/Makefile.inc src/check_mesh_quality_CUBIT_Abaqus/Makefile"
 
 cat >confcache <<\_ACEOF
@@ -7972,6 +8151,13 @@
 Usually this means the macro was only invoked conditionally." >&2;}
    { (exit 1); exit 1; }; }
 fi
+if test -z "${COND_OPENMP_TRUE}" && test -z "${COND_OPENMP_FALSE}"; then
+  { { $as_echo "$as_me:$LINENO: error: conditional \"COND_OPENMP\" was never defined.
+Usually this means the macro was only invoked conditionally." >&5
+$as_echo "$as_me: error: conditional \"COND_OPENMP\" was never defined.
+Usually this means the macro was only invoked conditionally." >&2;}
+   { (exit 1); exit 1; }; }
+fi
 
 : ${CONFIG_STATUS=./config.status}
 ac_write_fail=0

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure.ac
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure.ac	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/configure.ac	2012-02-25 01:37:33 UTC (rev 19680)
@@ -54,7 +54,14 @@
     [want_cuda=no])
 AM_CONDITIONAL([COND_CUDA], [test "$want_cuda" = yes])
 
+AC_ARG_WITH([openmp],
+    [AC_HELP_STRING([--with-openmp],
+        [build OpenMP enabled version @<:@default=no@:>@])],
+    [want_openmp="$withval"],
+    [want_openmp=no])
+AM_CONDITIONAL([COND_OPENMP], [test "$want_openmp" = yes])
 
+
 # Checks for programs.
 
 if test "$want_pyre" = yes; then
@@ -357,6 +364,7 @@
 if test x"$MPICC" = x; then
     MPICC=mpicc
 fi
+AC_ARG_VAR(OPENMP_LIB,[Location of extra OpenMP libraries])
 
 AC_ARG_VAR(LOCAL_PATH_IS_ALSO_GLOBAL, [files on a local path on each node are also seen as global with same path @<:@default=true@:>@])
 if test x"$LOCAL_PATH_IS_ALSO_GLOBAL" = x; then
@@ -427,10 +435,17 @@
     CIT_FC_MAIN
 fi
 
+if test "$want_cuda" = yes; then
+    CIT_CUDA_COMPILER
+fi
 
+if test "$want_openmp" = yes; then
+    CIT_FC_OPENMP_MODULE([$FC],[$FLAGS_NO_CHECK])        
+fi
+
+
 # Checks for library functions.
 
-
 AC_CONFIG_FILES([Makefile in_data_files/Par_file in_data_files/CMTSOLUTION in_data_files/STATIONS src/meshfem3D/Makefile src/meshfem3D/constants.h src/meshfem3D/precision.h  src/decompose_mesh_SCOTCH/Makefile src/specfem3D/Makefile src/generate_databases/Makefile src/shared/constants.h src/shared/precision.h src/decompose_mesh_SCOTCH/scotch_5.1.11/src/Makefile.inc src/check_mesh_quality_CUBIT_Abaqus/Makefile])
 AC_OUTPUT
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/process.sh
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/process.sh	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/homogeneous_halfspace/process.sh	2012-02-25 01:37:33 UTC (rev 19680)
@@ -40,9 +40,8 @@
 
 # links executables
 cd bin/
-ln -s ../../../bin/xdecompose_mesh_SCOTCH
-ln -s ../../../bin/xgenerate_databases
-ln -s ../../../bin/xspecfem3D
+rm -f bin/x*
+cp ../../../bin/x* ./
 cd ../
 
 # decomposes mesh

Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/m4/cit_openmp.m4
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/m4/cit_openmp.m4	                        (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/m4/cit_openmp.m4	2012-02-25 01:37:33 UTC (rev 19680)
@@ -0,0 +1,56 @@
+# -*- Autoconf -*-
+
+
+## ---------------------------- ##
+## Autoconf macros for Fortran. ##
+## ---------------------------- ##
+
+
+# CIT_FC_OPENMP_MODULE(FC, FCFLAGS)
+# -----------------------------------------------------
+AC_DEFUN([CIT_FC_OPENMP_MODULE], [
+AC_LANG_PUSH(Fortran)
+cit_fc_save_fc=$FC
+cit_fc_save_fcflags=$FCFLAGS
+FC=$1
+FCFLAGS="$FCFLAGS $2"
+
+AC_MSG_CHECKING([whether OpenMP directives work])
+
+#AC_COMPILE_IFELSE(_CIT_FC_TRIVIAL_OPENMP_PROGRAM, [
+#    AC_MSG_RESULT(yes)
+#], [
+#    AC_MSG_RESULT(no)
+#    AC_MSG_FAILURE([cannot compile a trivial OpenMP program using $1])
+#])
+
+AC_LINK_IFELSE(_CIT_FC_TRIVIAL_OPENMP_PROGRAM, [
+    AC_MSG_RESULT(yes)
+], [
+    AC_MSG_RESULT(no)
+    AC_MSG_FAILURE([cannot link a trivial OpenMP program using $1 with flags: $2])
+])
+
+FC=$cit_fc_save_fc
+FCFLAGS=$cit_fc_save_fcflags
+
+
+AC_LANG_POP(Fortran)
+])dnl CIT_FC_OPENMP_MODULE
+
+AC_DEFUN([_CIT_FC_TRIVIAL_OPENMP_PROGRAM], [
+AC_LANG_PROGRAM([], [[
+       implicit none
+       integer OMP_get_thread_num  
+       integer OMP_GET_MAX_THREADS  
+       integer NUM_THREADS
+       integer thread_id
+  
+       NUM_THREADS = OMP_GET_MAX_THREADS()
+       !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(thread_id)
+       thread_id = OMP_get_thread_num()+1
+       !$OMP END PARALLEL
+]])
+])dnl _CIT_FC_TRIVIAL_OPENMP_PROGRAM
+
+dnl end of file

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu	2012-02-25 01:37:33 UTC (rev 19680)
@@ -115,11 +115,10 @@
 
 extern "C"
 void FC_FUNC_(compute_coupling_ac_el_cuda,
-              COMPUTE_COUPLING_AC_EL_CUDA)(
-                                            long* Mesh_pointer_f,
-                                            int* phase_is_innerf,
-                                            int* num_coupling_ac_el_facesf,
-                                            int* SIMULATION_TYPEf) {
+              COMPUTE_COUPLING_AC_EL_CUDA)(long* Mesh_pointer_f,
+                                           int* phase_is_innerf,
+                                           int* num_coupling_ac_el_facesf,
+                                           int* SIMULATION_TYPEf) {
   TRACE("compute_coupling_ac_el_cuda");
   //double start_time = get_time();
 
@@ -238,13 +237,13 @@
         // note: uses potential chi such that displacement s = grad(chi),
         //         pressure becomes: p = - kappa ( div( s ) ) = rho ( - dot_dot_chi + g * s )
         //  g only acting in negative z-direction
-        
+
         // daniel: TODO - check gravity and coupling would be displ * nz  correct?
         pressure = rhol*( - potential_dot_dot_acoustic[iglob]
                          + minus_g[iglob] * displ[iglob*3+2] );
 
-        //daniel: TODO - check gravity and coupling  
-        //pressure = - potential_dot_dot_acoustic[iglob] ;          
+        //daniel: TODO - check gravity and coupling
+        //pressure = - potential_dot_dot_acoustic[iglob] ;
         //if( iface == 128 && igll == 5 ){
         //  printf("coupling acoustic: %f %f \n",potential_dot_dot_acoustic[iglob],
         //             minus_g[iglob] * displ[iglob*3+2]);
@@ -252,7 +251,7 @@
 
       }else{
         // no gravity: uses potential chi such that displacement s = 1/rho grad(chi)
-        //                  pressure p = - kappa ( div( s ) ) then becomes: p = - dot_dot_chi 
+        //                  pressure p = - kappa ( div( s ) ) then becomes: p = - dot_dot_chi
         //                  ( multiplied with factor 1/kappa due to setup of equation of motion )
         pressure = - potential_dot_dot_acoustic[iglob];
       }
@@ -278,11 +277,10 @@
 
 extern "C"
 void FC_FUNC_(compute_coupling_el_ac_cuda,
-              COMPUTE_COUPLING_EL_AC_CUDA)(
-                                                 long* Mesh_pointer_f,
-                                                 int* phase_is_innerf,
-                                                 int* num_coupling_ac_el_facesf,
-                                                 int* SIMULATION_TYPEf) {
+              COMPUTE_COUPLING_EL_AC_CUDA)(long* Mesh_pointer_f,
+                                           int* phase_is_innerf,
+                                           int* num_coupling_ac_el_facesf,
+                                           int* SIMULATION_TYPEf) {
   TRACE("compute_coupling_el_ac_cuda");
   //double start_time = get_time();
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu	2012-02-25 01:37:33 UTC (rev 19680)
@@ -412,25 +412,25 @@
       // assumes that g only acts in (negative) z-direction
       kappa_invl = 1.f / d_kappastore[working_element*NGLL3 + tx];
       iglob = d_ibool[working_element*NGLL3 + tx]-1;
-      
+
       // daniel: TODO - check gravity
 //      if( kappa_invl <= 0.0f ){
 //        printf("kappa error: %f %f\n",kappa_invl,d_kappastore[working_element*NGLL3 + tx]);
 //        printf("kappa error: thread %d %d \n",tx,working_element);
-//        asm("trap;");      
+//        asm("trap;");
 //      }
 //      if( iglob <= 0 ){
 //        printf("iglob error: %d %d %d \n",iglob,tx,working_element);
-//        asm("trap;");                
-//      }      
-      
+//        asm("trap;");
+//      }
+
       gravity_term = minus_g[iglob] * kappa_invl * jacobianl * wgll_cube[tx] * dpotentialdzl;
 
       // daniel: TODO - check gravity
       //gravity_term = 0.f;
       //if( iglob == 5 ){
       //  printf("iglob infos: %f %f %f %f %f \n",minus_g[iglob],kappa_invl,jacobianl,wgll_cube[tx],dpotentialdzl);
-      //}      
+      //}
     }
 
     // density (reciproc)

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu	2012-02-25 01:37:33 UTC (rev 19680)
@@ -488,8 +488,8 @@
   // debug
   //*rho_s_H1 = 0.f;
   //*rho_s_H2 = 0.f;
-  //*rho_s_H3 = 0.f ;  
-    
+  //*rho_s_H3 = 0.f ;
+
 }
 
 /* ----------------------------------------------------------------------------------------------- */

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2012-02-25 01:37:33 UTC (rev 19680)
@@ -124,8 +124,8 @@
 
 // cuda kernel block size for updating displacements/potential (newmark time scheme)
 // current hardware: 128 is slightly faster than 256 ( ~ 4%)
-#define BLOCKSIZE_KERNEL1 128 
-#define BLOCKSIZE_KERNEL3 128 
+#define BLOCKSIZE_KERNEL1 128
+#define BLOCKSIZE_KERNEL3 128
 #define BLOCKSIZE_TRANSFER 256
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -240,7 +240,6 @@
   // receivers
   int* d_number_receiver_global;
   int* d_ispec_selected_rec;
-  int* d_islice_selected_rec;
   int nrec_local;
   realw* d_station_seismo_field;
   realw* h_station_seismo_field;

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/noise_tomography_cuda.cu	2012-02-25 01:37:33 UTC (rev 19680)
@@ -64,7 +64,7 @@
   MPI_Comm_rank(MPI_COMM_WORLD,&procid);
 #else
   procid = 0;
-#endif  
+#endif
   printf("%d: sends msg_id %d\n",procid,*id);
 }
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2012-02-25 01:37:33 UTC (rev 19680)
@@ -81,8 +81,10 @@
     gethostname(hostname, sizeof(hostname));
     printf("PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
     FILE *file = fopen("/scratch/eiger/rietmann/attach_gdb.txt","w+");
-    fprintf(file,"PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
-    fclose(file);
+    if (file != NULL){
+      fprintf(file,"PID %d on %s:%d ready for attach\n", getpid(), hostname,myrank);
+      fclose(file);
+    }
     fflush(stdout);
     while (0 == i)
       sleep(5);
@@ -128,7 +130,7 @@
     fflush(stdout);
 #ifdef WITH_MPI
     MPI_Abort(MPI_COMM_WORLD,1);
-#endif    
+#endif
     exit(EXIT_FAILURE);
   }
   return;
@@ -166,9 +168,11 @@
 
   sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_device_mem_usage_proc_%06d.txt",myrank);
   fp = fopen(filename,"a+");
-  fprintf(fp,"%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
-   used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
-  fclose(fp);
+  if (fp != NULL){
+    fprintf(fp,"%d: @%s GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank, info_str,
+            used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
+    fclose(fp);
+  }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -191,7 +195,7 @@
 
   // show memory usage of GPU
   int myrank;
-#ifdef WITH_MPI  
+#ifdef WITH_MPI
   MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
 #else
   myrank = 0;
@@ -204,15 +208,17 @@
    used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
 
 }
+*/
 
- extern "C"
- void FC_FUNC_(show_free_device_memory,
- SHOW_FREE_DEVICE_MEMORY)() {
+/*
+extern "C"
+void FC_FUNC_(show_free_device_memory,
+              SHOW_FREE_DEVICE_MEMORY)() {
  TRACE("show_free_device_memory");
- 
+
  show_free_memory("from fortran");
- }
- */
+}
+*/
 
 /* ----------------------------------------------------------------------------------------------- */
 
@@ -474,23 +480,23 @@
   int device_count = 0;
   cudaGetDeviceCount(&device_count);
   exit_on_cuda_error("CUDA runtime cudaGetDeviceCount: check if driver and runtime libraries work together\nexiting...\n");
-  
+
   // returns device count to fortran
   if (device_count == 0) exit_on_error("CUDA runtime error: there is no device supporting CUDA\n");
   *ncuda_devices = device_count;
 
 
-  // Sets the active device  
+  // Sets the active device
   if(device_count > 1) {
     // generalized for more GPUs per node
     // note: without previous context release, cudaSetDevice will complain with the cuda error
     //         "setting the device when a process is active is not allowed"
-    // releases previous contexts  
+    // releases previous contexts
     cudaThreadExit();
-    
+
     //printf("rank %d: cuda device count = %d sets device = %d \n",myrank,device_count,myrank % device_count);
     //MPI_Barrier(MPI_COMM_WORLD);
-    
+
     // sets active device
     cudaSetDevice( myrank % device_count );
     exit_on_cuda_error("cudaSetDevice");
@@ -515,47 +521,48 @@
   FILE* fp;
   sprintf(filename,"../in_out_files/OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
   fp = fopen(filename,"a+");
+  if (fp != NULL){
+    // display device properties
+    fprintf(fp,"Device Name = %s\n",deviceProp.name);
+    fprintf(fp,"multiProcessorCount: %d\n",deviceProp.multiProcessorCount);
+    fprintf(fp,"totalGlobalMem (in MB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f));
+    fprintf(fp,"totalGlobalMem (in GB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f * 1024.f));
+    fprintf(fp,"sharedMemPerBlock (in bytes): %lu\n",(unsigned long) deviceProp.sharedMemPerBlock);
+    fprintf(fp,"Maximum number of threads per block: %d\n",deviceProp.maxThreadsPerBlock);
+    fprintf(fp,"Maximum size of each dimension of a block: %d x %d x %d\n",
+            deviceProp.maxThreadsDim[0],deviceProp.maxThreadsDim[1],deviceProp.maxThreadsDim[2]);
+    fprintf(fp,"Maximum sizes of each dimension of a grid: %d x %d x %d\n",
+            deviceProp.maxGridSize[0],deviceProp.maxGridSize[1],deviceProp.maxGridSize[2]);
+    fprintf(fp,"Compute capability of the device = %d.%d\n", deviceProp.major, deviceProp.minor);
+    if(deviceProp.canMapHostMemory){
+      fprintf(fp,"canMapHostMemory: TRUE\n");
+    }else{
+      fprintf(fp,"canMapHostMemory: FALSE\n");
+    }
+    if(deviceProp.deviceOverlap){
+      fprintf(fp,"deviceOverlap: TRUE\n");
+    }else{
+      fprintf(fp,"deviceOverlap: FALSE\n");
+    }
 
-  // display device properties
-  fprintf(fp,"Device Name = %s\n",deviceProp.name);
-  fprintf(fp,"multiProcessorCount: %d\n",deviceProp.multiProcessorCount);
-  fprintf(fp,"totalGlobalMem (in MB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f));
-  fprintf(fp,"totalGlobalMem (in GB): %f\n",(unsigned long) deviceProp.totalGlobalMem / (1024.f * 1024.f * 1024.f));
-  fprintf(fp,"sharedMemPerBlock (in bytes): %lu\n",(unsigned long) deviceProp.sharedMemPerBlock);
-  fprintf(fp,"Maximum number of threads per block: %d\n",deviceProp.maxThreadsPerBlock);
-  fprintf(fp,"Maximum size of each dimension of a block: %d x %d x %d\n",
-          deviceProp.maxThreadsDim[0],deviceProp.maxThreadsDim[1],deviceProp.maxThreadsDim[2]);
-  fprintf(fp,"Maximum sizes of each dimension of a grid: %d x %d x %d\n",
-          deviceProp.maxGridSize[0],deviceProp.maxGridSize[1],deviceProp.maxGridSize[2]);
-  fprintf(fp,"Compute capability of the device = %d.%d\n", deviceProp.major, deviceProp.minor);
-  if(deviceProp.canMapHostMemory){
-    fprintf(fp,"canMapHostMemory: TRUE\n");
-  }else{
-    fprintf(fp,"canMapHostMemory: FALSE\n");
-  }
-  if(deviceProp.deviceOverlap){
-    fprintf(fp,"deviceOverlap: TRUE\n");
-  }else{
-    fprintf(fp,"deviceOverlap: FALSE\n");
-  }
+    // make sure that the device has compute capability >= 1.3
+    //if (deviceProp.major < 1){
+    //  fprintf(stderr,"Compute capability major number should be at least 1, exiting...\n\n");
+    //  exit_on_error("CUDA Compute capability major number should be at least 1");
+    //}
+    //if (deviceProp.major == 1 && deviceProp.minor < 3){
+    //  fprintf(stderr,"Compute capability should be at least 1.3, exiting...\n");
+    //  exit_on_error("CUDA Compute capability major number should be at least 1.3");
+    //}
 
-  // make sure that the device has compute capability >= 1.3
-  //if (deviceProp.major < 1){
-  //  fprintf(stderr,"Compute capability major number should be at least 1, exiting...\n\n");
-  //  exit_on_error("CUDA Compute capability major number should be at least 1");
-  //}
-  //if (deviceProp.major == 1 && deviceProp.minor < 3){
-  //  fprintf(stderr,"Compute capability should be at least 1.3, exiting...\n");
-  //  exit_on_error("CUDA Compute capability major number should be at least 1.3");
-  //}
+    // outputs initial memory infos via cudaMemGetInfo()
+    double free_db,used_db,total_db;
+    get_free_memory(&free_db,&used_db,&total_db);
+    fprintf(fp,"%d: GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank,
+            used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
 
-  // outputs initial memory infos via cudaMemGetInfo()
-  double free_db,used_db,total_db;
-  get_free_memory(&free_db,&used_db,&total_db);
-  fprintf(fp,"%d: GPU memory usage: used = %f MB, free = %f MB, total = %f MB\n", myrank,
-          used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0);
-
-  fclose(fp);
+    fclose(fp);
+  }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -1490,13 +1497,6 @@
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f);
 
-  // allocates arrays for receivers
-  print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_rec,
-                                     *islice_selected_rec_size*sizeof(int)),6001);
-  // copies arrays to GPU device
-  print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_rec,islice_selected_rec,
-                                     *islice_selected_rec_size*sizeof(int),cudaMemcpyHostToDevice),6002);
-
   // adjoint source arrays
   mp->nadj_rec_local = *nadj_rec_local;
   if( mp->nadj_rec_local > 0 ){
@@ -1900,7 +1900,6 @@
 
   // purely adjoint & kernel array
   if( *SIMULATION_TYPE == 2 || *SIMULATION_TYPE == 3 ){
-    cudaFree(mp->d_islice_selected_rec);
     if(mp->nadj_rec_local > 0 ){
       cudaFree(mp->d_adj_sourcearrays);
       cudaFree(mp->d_pre_computed_irec);

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c	2012-02-25 01:37:33 UTC (rev 19680)
@@ -1,30 +1,30 @@
-/*
- !=====================================================================
- !
- !               S p e c f e m 3 D  V e r s i o n  2 . 0
- !               ---------------------------------------
- !
- !          Main authors: Dimitri Komatitsch and Jeroen Tromp
- !    Princeton University, USA and University of Pau / CNRS / INRIA
- ! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
- !                            April 2011
- !
- ! This program is free software; you can redistribute it and/or modify
- ! it under the terms of the GNU General Public License as published by
- ! the Free Software Foundation; either version 2 of the License, or
- ! (at your option) any later version.
- !
- ! This program is distributed in the hope that it will be useful,
- ! but WITHOUT ANY WARRANTY; without even the implied warranty of
- ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- ! GNU General Public License for more details.
- !
- ! You should have received a copy of the GNU General Public License along
- ! with this program; if not, write to the Free Software Foundation, Inc.,
- ! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- !
- !=====================================================================
- */
+/* 
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+*/
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -33,101 +33,72 @@
 
 typedef float realw;
 
-/* from check_fields_cuda.cu */
+ 
+
+//
+// src/cuda/check_fields_cuda.cu
+//
+
 void FC_FUNC_(check_max_norm_displ_gpu,
-              CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID){}
+              CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {} 
 
 void FC_FUNC_(check_max_norm_vector,
-              CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID){}
+              CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID) {} 
+
 void FC_FUNC_(check_max_norm_displ,
-              CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID){}
+              CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID) {} 
 
 void FC_FUNC_(check_max_norm_b_displ_gpu,
-              CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID){}
+              CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID) {} 
 
 void FC_FUNC_(check_max_norm_b_accel_gpu,
-              CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID){}
+              CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID) {} 
 
 void FC_FUNC_(check_max_norm_b_veloc_gpu,
-              CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID){}
+              CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID) {} 
 
 void FC_FUNC_(check_max_norm_b_displ,
-              CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID){}
+              CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID) {} 
 
 void FC_FUNC_(check_max_norm_b_accel,
-              CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID){}
+              CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID) {} 
 
 void FC_FUNC_(check_error_vectors,
-              CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2){}
+              CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2) {} 
 
 void FC_FUNC_(get_max_accel,
-              GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer){}
+              GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {} 
 
 void FC_FUNC_(get_norm_acoustic_from_device,
               GET_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
                                                   long* Mesh_pointer_f,
-                                                  int* SIMULATION_TYPE){}
+                                                  int* SIMULATION_TYPE) {} 
 
 void FC_FUNC_(get_norm_elastic_from_device,
               GET_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
                                                  long* Mesh_pointer_f,
-                                                 int* SIMULATION_TYPE){}
+                                                 int* SIMULATION_TYPE) {} 
 
 
-/* from file compute_add_sources_elastic_cuda.cu */
+//
+// src/cuda/compute_add_sources_acoustic_cuda.cu
+//
 
-void FC_FUNC_(compute_add_sources_el_cuda,
-              COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
-                                           int* phase_is_innerf,
-                                           int* NSOURCESf,
-                                           double* h_stf_pre_compute,
-                                           int* myrankf){}
-
-void FC_FUNC_(compute_add_sources_el_s3_cuda,
-              COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
-                                              double* h_stf_pre_compute,
-                                              int* NSOURCES,
-                                              int* phase_is_inner,
-                                              int* myrank){}
-
-void FC_FUNC_(add_source_master_rec_noise_cu,
-              ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f,
-                                                int* myrank_f,
-                                                int* it_f,
-                                                int* irec_master_noise_f,
-                                                int* islice_selected_rec){}
-
-void FC_FUNC_(add_sources_el_sim_type_2_or_3,
-              ADD_SOURCES_EL_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
-                                           realw* h_adj_sourcearrays,
-                                           int* phase_is_inner,
-                                           int* h_ispec_is_inner,
-                                           int* h_ispec_is_elastic,
-                                           int* h_ispec_selected_rec,
-                                           int* myrank,
-                                           int* nrec,
-                                           int* time_index,
-                                           int* h_islice_selected_rec,
-                                           int* nadj_rec_local,
-                                           int* NTSTEP_BETWEEN_READ_ADJSRC){}
-
-/* from file compute_add_sources_acoustic_cuda.cu */
-
 void FC_FUNC_(compute_add_sources_ac_cuda,
               COMPUTE_ADD_SOURCES_AC_CUDA)(long* Mesh_pointer_f,
                                                  int* phase_is_innerf,
                                                  int* NSOURCESf,
                                                  int* SIMULATION_TYPEf,
                                                  double* h_stf_pre_compute,
-                                                 int* myrankf){}
+                                                 int* myrankf) {} 
 
 void FC_FUNC_(compute_add_sources_ac_s3_cuda,
-              COMPUTE_ADD_SOURCES_AC_S3_CUDA)(long* Mesh_pointer_f,
+              COMPUTE_ADD_SOURCES_AC_s3_CUDA)(long* Mesh_pointer_f,
                                                       int* phase_is_innerf,
                                                       int* NSOURCESf,
                                                       int* SIMULATION_TYPEf,
                                                       double* h_stf_pre_compute,
-                                                      int* myrankf){}
+                                                      int* myrankf) {} 
 
 void FC_FUNC_(add_sources_ac_sim_2_or_3_cuda,
               ADD_SOURCES_AC_SIM_2_OR_3_CUDA)(long* Mesh_pointer,
@@ -141,26 +112,70 @@
                                                int* time_index,
                                                int* h_islice_selected_rec,
                                                int* nadj_rec_local,
-                                               int* NTSTEP_BETWEEN_ADJSRC){}
+                                               int* NTSTEP_BETWEEN_READ_ADJSRC) {} 
 
-/* from compute_coupling_cuda.cu */
 
-void FC_FUNC_(compute_coupling_ac_el_cuda,
-              COMPUTE_COUPLING_AC_EL_CUDA)(
-                                            long* Mesh_pointer_f,
+//
+// src/cuda/compute_add_sources_elastic_cuda.cu
+//
+
+void FC_FUNC_(compute_add_sources_el_cuda,
+              COMPUTE_ADD_SOURCES_EL_CUDA)(long* Mesh_pointer_f,
                                             int* phase_is_innerf,
-                                            int* num_coupling_ac_el_facesf,
-                                            int* SIMULATION_TYPEf){}
+                                            int* NSOURCESf,
+                                            double* h_stf_pre_compute,
+                                            int* myrankf) {} 
 
+void FC_FUNC_(compute_add_sources_el_s3_cuda,
+              COMPUTE_ADD_SOURCES_EL_S3_CUDA)(long* Mesh_pointer,
+                                              double* h_stf_pre_compute,
+                                              int* NSOURCESf,
+                                              int* phase_is_inner,
+                                              int* myrank) {} 
+
+void FC_FUNC_(add_source_master_rec_noise_cu,
+              ADD_SOURCE_MASTER_REC_NOISE_CU)(long* Mesh_pointer_f,
+                                                int* myrank_f,
+                                                int* it_f,
+                                                int* irec_master_noise_f,
+                                                int* islice_selected_rec) {} 
+
+void FC_FUNC_(add_sources_el_sim_type_2_or_3,
+              ADD_SOURCES_EL_SIM_TYPE_2_OR_3)(long* Mesh_pointer,
+                                               realw* h_adj_sourcearrays,
+                                               int* phase_is_inner,
+                                               int* h_ispec_is_inner,
+                                               int* h_ispec_is_elastic,
+                                               int* h_ispec_selected_rec,
+                                               int* myrank,
+                                               int* nrec,
+                                               int* time_index,
+                                               int* h_islice_selected_rec,
+                                               int* nadj_rec_local,
+                                               int* NTSTEP_BETWEEN_READ_ADJSRC) {} 
+
+
+//
+// src/cuda/compute_coupling_cuda.cu
+//
+
+void FC_FUNC_(compute_coupling_ac_el_cuda,
+              COMPUTE_COUPLING_AC_EL_CUDA)(long* Mesh_pointer_f,
+                                           int* phase_is_innerf,
+                                           int* num_coupling_ac_el_facesf,
+                                           int* SIMULATION_TYPEf) {} 
+
 void FC_FUNC_(compute_coupling_el_ac_cuda,
-              COMPUTE_COUPLING_EL_AC_CUDA)(
-                                                 long* Mesh_pointer_f,
-                                                 int* phase_is_innerf,
-                                                 int* num_coupling_ac_el_facesf,
-                                                 int* SIMULATION_TYPEf){}
+              COMPUTE_COUPLING_EL_AC_CUDA)(long* Mesh_pointer_f,
+                                           int* phase_is_innerf,
+                                           int* num_coupling_ac_el_facesf,
+                                           int* SIMULATION_TYPEf) {} 
 
-/* from compute_forces_acoustic_cuda.cu */
 
+//
+// src/cuda/compute_forces_acoustic_cuda.cu
+//
+
 void FC_FUNC_(transfer_boun_pot_from_device,
               TRANSFER_BOUN_POT_FROM_DEVICE)(
                                               int* size,
@@ -171,7 +186,7 @@
                                               int* max_nibool_interfaces_ext_mesh,
                                               int* nibool_interfaces_ext_mesh,
                                               int* ibool_interfaces_ext_mesh,
-                                              int* FORWARD_OR_ADJOINT){}
+                                              int* FORWARD_OR_ADJOINT){} 
 
 void FC_FUNC_(transfer_asmbl_pot_to_device,
               TRANSFER_ASMBL_POT_TO_DEVICE)(
@@ -182,52 +197,54 @@
                                                 int* max_nibool_interfaces_ext_mesh,
                                                 int* nibool_interfaces_ext_mesh,
                                                 int* ibool_interfaces_ext_mesh,
-                                                int* FORWARD_OR_ADJOINT){}
+                                                int* FORWARD_OR_ADJOINT) {} 
 
 void FC_FUNC_(compute_forces_acoustic_cuda,
               COMPUTE_FORCES_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
                                             int* iphase,
                                             int* nspec_outer_acoustic,
                                             int* nspec_inner_acoustic,
-                                            int* SIMULATION_TYPE){}
+                                            int* SIMULATION_TYPE) {} 
 
 void FC_FUNC_(kernel_3_a_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
                              long* Mesh_pointer,
                              int* size_F,
-                             int* SIMULATION_TYPE){}
+                             int* SIMULATION_TYPE) {} 
 
 void FC_FUNC_(kernel_3_b_acoustic_cuda,KERNEL_3_ACOUSTIC_CUDA)(
                                                              long* Mesh_pointer,
                                                              int* size_F,
                                                              realw* deltatover2_F,
                                                              int* SIMULATION_TYPE,
-                                                             realw* b_deltatover2_F){}
+                                                             realw* b_deltatover2_F) {} 
 
 void FC_FUNC_(acoustic_enforce_free_surf_cuda,
               ACOUSTIC_ENFORCE_FREE_SURF_CUDA)(long* Mesh_pointer_f,
                                                   int* SIMULATION_TYPE,
-                                                  int* ABSORB_FREE_SURFACE){}
+                                                  int* ABSORB_FREE_SURFACE) {} 
 
 
-/* from compute_forces_elastic_cuda.cu */
+//
+// src/cuda/compute_forces_elastic_cuda.cu
+//
+
 void FC_FUNC_(transfer_boun_accel_from_device,
               TRANSFER_BOUN_ACCEL_FROM_DEVICE)(int* size, long* Mesh_pointer_f, realw* accel,
-               realw* send_accel_buffer,
-               int* num_interfaces_ext_mesh,
-               int* max_nibool_interfaces_ext_mesh,
-               int* nibool_interfaces_ext_mesh,
-               int* ibool_interfaces_ext_mesh,
-               int* FORWARD_OR_ADJOINT){}
+                                                    realw* send_accel_buffer,
+                                                    int* num_interfaces_ext_mesh,
+                                                    int* max_nibool_interfaces_ext_mesh,
+                                                    int* nibool_interfaces_ext_mesh,
+                                                    int* ibool_interfaces_ext_mesh,
+                                                    int* FORWARD_OR_ADJOINT){} 
 
-
 void FC_FUNC_(transfer_asmbl_accel_to_device,
-              TRANSFER_ASMBL_ACCEL_TO_DEVICE)(long* Mesh_pointer,
-                                              realw* accel,
-                                              realw* buffer_recv_vector_ext_mesh,
-                                              int* num_interfaces_ext_mesh,
-                                              int* max_nibool_interfaces_ext_mesh,
-                                              int* nibool_interfaces_ext_mesh,
-                 int* ibool_interfaces_ext_mesh,int* FORWARD_OR_ADJOINT){}
+              TRANSFER_ASMBL_ACCEL_TO_DEVICE)(long* Mesh_pointer, realw* accel,
+                                                    realw* buffer_recv_vector_ext_mesh,
+                                                    int* num_interfaces_ext_mesh,
+                                                    int* max_nibool_interfaces_ext_mesh,
+                                                    int* nibool_interfaces_ext_mesh,
+                                                    int* ibool_interfaces_ext_mesh,
+                                                    int* FORWARD_OR_ADJOINT) {} 
 
 void FC_FUNC_(compute_forces_elastic_cuda,
               COMPUTE_FORCES_ELASTIC_CUDA)(long* Mesh_pointer_f,
@@ -237,7 +254,7 @@
                                            int* SIMULATION_TYPE,
                                            int* COMPUTE_AND_STORE_STRAIN,
                                            int* ATTENUATION,
-                                           int* ANISOTROPY){}
+                                           int* ANISOTROPY) {} 
 
 void FC_FUNC_(kernel_3_a_cuda,
               KERNEL_3_A_CUDA)(long* Mesh_pointer,
@@ -245,62 +262,76 @@
                                realw* deltatover2_F,
                                int* SIMULATION_TYPE_f,
                                realw* b_deltatover2_F,
-                               int* OCEANS){}
+                               int* OCEANS) {} 
 
 void FC_FUNC_(kernel_3_b_cuda,
               KERNEL_3_B_CUDA)(long* Mesh_pointer,
                              int* size_F,
                              realw* deltatover2_F,
                              int* SIMULATION_TYPE_f,
-             realw* b_deltatover2_F){}
+                             realw* b_deltatover2_F) {} 
 
 void FC_FUNC_(elastic_ocean_load_cuda,
               ELASTIC_OCEAN_LOAD_CUDA)(long* Mesh_pointer_f,
-                                       int* SIMULATION_TYPE){}
+                                       int* SIMULATION_TYPE) {} 
 
-/* from file compute_kernels_cuda.cu */
 
+//
+// src/cuda/compute_kernels_cuda.cu
+//
+
 void FC_FUNC_(compute_kernels_elastic_cuda,
               COMPUTE_KERNELS_ELASTIC_CUDA)(long* Mesh_pointer,
-                                            realw* deltat_f){}
+                                            realw* deltat_f) {} 
 
 void FC_FUNC_(compute_kernels_strgth_noise_cu,
               COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer,
-                                               realw* h_noise_surface_movie,
-                                               realw* deltat){}
+                                                    realw* h_noise_surface_movie,
+                                                    realw* deltat) {} 
 
 void FC_FUNC_(compute_kernels_acoustic_cuda,
               COMPUTE_KERNELS_ACOUSTIC_CUDA)(
                                              long* Mesh_pointer,
-                                             realw* deltat_f){}
+                                             realw* deltat_f) {} 
 
 void FC_FUNC_(compute_kernels_hess_cuda,
               COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
-                                         realw* deltat_f) {}
+                                         realw* deltat_f,
+                                         int* ELASTIC_SIMULATION,
+                                         int* ACOUSTIC_SIMULATION) {} 
 
-/* from file compute_stacey_acoustic_cuda.cu */
+
+//
+// src/cuda/compute_stacey_acoustic_cuda.cu
+//
+
 void FC_FUNC_(compute_stacey_acoustic_cuda,
               COMPUTE_STACEY_ACOUSTIC_CUDA)(
                                     long* Mesh_pointer_f,
                                     int* phase_is_innerf,
                                     int* SIMULATION_TYPEf,
                                     int* SAVE_FORWARDf,
-                                    realw* h_b_absorb_potential){}
+                                    realw* h_b_absorb_potential) {} 
 
 
-/* from file compute_stacey_elastic_cuda.cu */
+//
+// src/cuda/compute_stacey_elastic_cuda.cu
+//
 
 void FC_FUNC_(compute_stacey_elastic_cuda,
               COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
                                            int* phase_is_innerf,
                                            int* SIMULATION_TYPEf,
                                            int* SAVE_FORWARDf,
-                                           realw* h_b_absorb_field){}
+                                           realw* h_b_absorb_field) {} 
 
-/* from file it_update_displacement_cuda.cu */
 
+//
+// src/cuda/it_update_displacement_cuda.cu
+//
+
 void FC_FUNC_(it_update_displacement_cuda,
-              it_update_displacement_cuda)(long* Mesh_pointer_f,
+              IT_UPDATE_DISPLACMENT_CUDA)(long* Mesh_pointer_f,
                                                  int* size_F,
                                                  realw* deltat_F,
                                                  realw* deltatsqover2_F,
@@ -308,55 +339,64 @@
                                                  int* SIMULATION_TYPE,
                                                  realw* b_deltat_F,
                                                  realw* b_deltatsqover2_F,
-                                                 realw* b_deltatover2_F){}
+                                                 realw* b_deltatover2_F) {} 
 
 void FC_FUNC_(it_update_displacement_ac_cuda,
-              IT_UPDATE_DISPLACEMENT_AC_CUDA)(long* Mesh_pointer_f,
-                                                           int* size_F,
-                                                           realw* deltat_F,
-                                                           realw* deltatsqover2_F,
-                                                           realw* deltatover2_F,
-                                                           int* SIMULATION_TYPE,
-                                                           realw* b_deltat_F,
-                                                           realw* b_deltatsqover2_F,
-                                                           realw* b_deltatover2_F){}
+              it_update_displacement_ac_cuda)(long* Mesh_pointer_f,
+                                               int* size_F,
+                                               realw* deltat_F,
+                                               realw* deltatsqover2_F,
+                                               realw* deltatover2_F,
+                                               int* SIMULATION_TYPE,
+                                               realw* b_deltat_F,
+                                               realw* b_deltatsqover2_F,
+                                               realw* b_deltatover2_F) {} 
 
-/* from file noise_tomography_cuda.cu */
 
-void FC_FUNC_(fortranflush,FORTRANFLUSH)(int* rank){}
-void FC_FUNC_(fortranprint,FORTRANPRINT)(int* id){}
+//
+// src/cuda/noise_tomography_cuda.cu
+//
 
-void FC_FUNC_(fortranprintf,FORTRANPRINTF)(realw* val){}
+void FC_FUNC_(fortranflush,FORTRANFLUSH)(int* rank){} 
 
-void FC_FUNC_(fortranprintd,FORTRANPRINTD)(double* val){}
+void FC_FUNC_(fortranprint,FORTRANPRINT)(int* id) {} 
 
-void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,realw* h_displ){}
+void FC_FUNC_(fortranprintf,FORTRANPRINTF)(realw* val) {} 
 
+void FC_FUNC_(fortranprintd,FORTRANPRINTD)(double* val) {} 
+
+void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,realw* h_displ) {} 
+
 void FC_FUNC_(transfer_surface_to_host,
               TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,
-                                        realw* h_noise_surface_movie){}
+                                        realw* h_noise_surface_movie) {} 
 
 void FC_FUNC_(noise_read_add_surface_movie_cu,
               NOISE_READ_ADD_SURFACE_MOVIE_CU)(long* Mesh_pointer_f,
                                                realw* h_noise_surface_movie,
-                                               int* NOISE_TOMOGRAPHYf){}
+                                               int* NOISE_TOMOGRAPHYf) {} 
 
 
-/* from file prepare_mesh_constants_cuda.cu            */
+//
+// src/cuda/prepare_mesh_constants_cuda.cu
+//
 
-void FC_FUNC_(pause_for_debug,PAUSE_FOR_DEBUG)(){}
+void FC_FUNC_(pause_for_debug,PAUSE_FOR_DEBUG)() {} 
+
 void FC_FUNC_(output_free_device_memory,
-              OUTPUT_FREE_DEVICE_MEMORY)(int* myrank){}
+              OUTPUT_FREE_DEVICE_MEMORY)(int* myrank) {} 
 
 void FC_FUNC_(show_free_device_memory,
-              SHOW_FREE_DEVICE_MEMORY)(){}
+              SHOW_FREE_DEVICE_MEMORY)() {} 
 
 void FC_FUNC_(get_free_device_memory,
-              get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ){}
+              get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ) {} 
 
-
 void FC_FUNC_(prepare_cuda_device,
-              PREPARE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices){}
+              PREPARE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices) { 
+ fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\n");
+ exit(1);
+} 
 
 void FC_FUNC_(prepare_constants_device,
               PREPARE_CONSTANTS_DEVICE)(long* Mesh_pointer,
@@ -367,8 +407,10 @@
                                         realw* h_gammax, realw* h_gammay, realw* h_gammaz,
                                         realw* h_kappav, realw* h_muv,
                                         int* h_ibool,
-                                        int* num_interfaces_ext_mesh, int* max_nibool_interfaces_ext_mesh,
-                                        int* h_nibool_interfaces_ext_mesh, int* h_ibool_interfaces_ext_mesh,
+                                        int* num_interfaces_ext_mesh,
+                                        int* max_nibool_interfaces_ext_mesh,
+                                        int* h_nibool_interfaces_ext_mesh,
+                                        int* h_ibool_interfaces_ext_mesh,
                                         realw* h_hprime_xx,realw* h_hprime_yy,realw* h_hprime_zz,
                                         realw* h_hprimewgll_xx,realw* h_hprimewgll_yy,realw* h_hprimewgll_zz,
                                         realw* h_wgllwgll_xy,realw* h_wgllwgll_xz,realw* h_wgllwgll_yz,
@@ -379,7 +421,7 @@
                                         int* h_num_abs_boundary_faces,
                                         int* h_ispec_is_inner,
                                         int* NSOURCES,
-                                        int* nsources_local,
+                                        int* nsources_local_f,
                                         realw* h_sourcearrays,
                                         int* h_islice_selected_source,
                                         int* h_ispec_selected_source,
@@ -388,22 +430,9 @@
                                         int* nrec_f,
                                         int* nrec_local_f,
                                         int* SIMULATION_TYPE,
-                                        int* USE_MESH_COLORING_GPU,
-                                        int* nspec_acoustic,int* nspec_elastic)
-{
-  fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\n");
-  exit(1);
-}
+                                        int* USE_MESH_COLORING_GPU_f,
+                                        int* nspec_acoustic,int* nspec_elastic) {} 
 
-void FC_FUNC_(prepare_sim2_or_3_const_device,
-              PREPARE_SIM2_OR_3_CONST_DEVICE)(
-                                              long* Mesh_pointer_f,
-                                              int* islice_selected_rec,
-                                              int* islice_selected_rec_size,
-                                              int* nadj_rec_local,
-                                              int* nrec,
-                                              int* myrank){}
-
 void FC_FUNC_(prepare_fields_acoustic_device,
               PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f,
                                               realw* rmass_acoustic,
@@ -427,12 +456,13 @@
                                               realw* coupling_ac_el_jacobian2Dw,
                                               int* num_colors_outer_acoustic,
                                               int* num_colors_inner_acoustic,
-                                              int* num_elem_colors_acoustic){}
+                                              int* num_elem_colors_acoustic) {} 
 
 void FC_FUNC_(prepare_fields_acoustic_adj_dev,
               PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
-                                               int* SIMULATION_TYPE,
-                                               int* APPROXIMATE_HESS_KL) {}
+                                              int* SIMULATION_TYPE,
+                                              int* APPROXIMATE_HESS_KL) {} 
+
 void FC_FUNC_(prepare_fields_elastic_device,
               PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
                                              int* size,
@@ -465,7 +495,7 @@
                                              int* num_colors_outer_elastic,
                                              int* num_colors_inner_elastic,
                                              int* num_elem_colors_elastic,
-                                             int* ANISOTROY,
+                                             int* ANISOTROPY,
                                              realw *c11store,
                                              realw *c12store,
                                              realw *c13store,
@@ -486,23 +516,31 @@
                                              realw *c46store,
                                              realw *c55store,
                                              realw *c56store,
-                                             realw *c66store){}
+                                             realw *c66store){} 
 
 void FC_FUNC_(prepare_fields_elastic_adj_dev,
               PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
-                                              int* size,
-                                              int* SIMULATION_TYPE,
-                                              int* COMPUTE_AND_STORE_STRAIN,
-                                              realw* epsilon_trace_over_3,
-                                              realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy,
-                                              realw* b_epsilondev_xz,realw* b_epsilondev_yz,
-                                              realw* b_epsilon_trace_over_3,
-                                              int* ATTENUATION,
-                                              int* R_size,
-                                              realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
-                                              realw* b_alphaval,realw* b_betaval,realw* b_gammaval,
-                                              int* APPROXIMATE_HESS_KL){}
+                                             int* size,
+                                             int* SIMULATION_TYPE,
+                                             int* COMPUTE_AND_STORE_STRAIN,
+                                             realw* epsilon_trace_over_3,
+                                             realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy,
+                                             realw* b_epsilondev_xz,realw* b_epsilondev_yz,
+                                             realw* b_epsilon_trace_over_3,
+                                             int* ATTENUATION,
+                                             int* R_size,
+                                             realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
+                                             realw* b_alphaval,realw* b_betaval,realw* b_gammaval,
+                                             int* APPROXIMATE_HESS_KL){} 
 
+void FC_FUNC_(prepare_sim2_or_3_const_device,
+              PREPARE_SIM2_OR_3_CONST_DEVICE)(
+                                              long* Mesh_pointer_f,
+                                              int* islice_selected_rec,
+                                              int* islice_selected_rec_size,
+                                              int* nadj_rec_local,
+                                              int* nrec,
+                                              int* myrank) {} 
 
 void FC_FUNC_(prepare_fields_noise_device,
               PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
@@ -518,8 +556,17 @@
                                            realw* normal_y_noise,
                                            realw* normal_z_noise,
                                            realw* mask_noise,
-                                           realw* free_surface_jacobian2Dw){}
+                                           realw* free_surface_jacobian2Dw) {} 
 
+void FC_FUNC_(prepare_fields_gravity_device,
+              PREPARE_FIELDS_gravity_DEVICE)(long* Mesh_pointer_f,
+                                             int* GRAVITY,
+                                             realw* minus_deriv_gravity,
+                                             realw* minus_g,
+                                             realw* h_wgll_cube,
+                                             int* ACOUSTIC_SIMULATION,
+                                             realw* rhostore) {} 
+
 void FC_FUNC_(prepare_cleanup_device,
               PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f,
                                       int* SIMULATION_TYPE,
@@ -532,57 +579,50 @@
                                       int* ATTENUATION,
                                       int* ANISOTROPY,
                                       int* OCEANS,
-                                      int* APPROXIMATE_HESS_KL){}
+                                      int* APPROXIMATE_HESS_KL) {} 
 
-void FC_FUNC_(prepare_fields_gravity_device,
-              PREPARE_FIELDS_gravity_DEVICE)(long* Mesh_pointer_f,
-                                             int* GRAVITY,
-                                             realw* minus_deriv_gravity,
-                                             realw* minus_g,
-                                             realw* h_wgll_cube,
-                                             int* ACOUSTIC_SIMULATION,
-                                             realw* rhostore){}
-/* from file transfer_fields_cuda.cu              */
 
+//
+// src/cuda/transfer_fields_cuda.cu
+//
+
 void FC_FUNC_(transfer_fields_el_to_device,
-              TRANSFER_FIELDS_EL_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f){}
+              TRANSFER_FIELDS_EL_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_fields_el_from_device,
-              TRANSFER_FIELDS_EL_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f){}
+              TRANSFER_FIELDS_EL_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_b_fields_to_device,
               TRANSFER_B_FIELDS_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
-                                           long* Mesh_pointer_f){}
+                                           long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_b_fields_from_device,
-              TRANSFER_B_FIELDS_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,long* Mesh_pointer_f){}
+              TRANSFER_B_FIELDS_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_accel_to_device,
-              TRNASFER_ACCEL_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f){}
+              TRNASFER_ACCEL_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_accel_from_device,
-              TRANSFER_ACCEL_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f){}
+              TRANSFER_ACCEL_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {} 
+
 void FC_FUNC_(transfer_b_accel_from_device,
-              TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, realw* b_accel,long* Mesh_pointer_f){}
+              TRNASFER_B_ACCEL_FROM_DEVICE)(int* size, realw* b_accel,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_sigma_from_device,
-              TRANSFER_SIGMA_FROM_DEVICE)(int* size, realw* sigma_kl,long* Mesh_pointer_f){}
+              TRANSFER_SIGMA_FROM_DEVICE)(int* size, realw* sigma_kl,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_b_displ_from_device,
-              TRANSFER_B_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f){}
+              TRANSFER_B_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_displ_from_device,
-              TRANSFER_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f){}
+              TRANSFER_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {} 
 
-/*
 void FC_FUNC_(transfer_compute_kernel_answers_from_device,
               TRANSFER_COMPUTE_KERNEL_ANSWERS_FROM_DEVICE)(long* Mesh_pointer,
                                                            realw* rho_kl,int* size_rho,
                                                            realw* mu_kl, int* size_mu,
-                                                           realw* kappa_kl, int* size_kappa){}
-*/
+                                                           realw* kappa_kl, int* size_kappa) {} 
 
-/*
 void FC_FUNC_(transfer_compute_kernel_fields_from_device,
               TRANSFER_COMPUTE_KERNEL_FIELDS_FROM_DEVICE)(long* Mesh_pointer,
                                                           realw* accel, int* size_accel,
@@ -604,8 +644,7 @@
                                                           realw* kappa_kl, int* size_kappa,
                                                           realw* epsilon_trace_over_3,
                                                           realw* b_epsilon_trace_over_3,
-                                                          int* size_epsilon_trace_over_3) {}
-*/
+                                                          int* size_epsilon_trace_over_3) {} 
 
 void FC_FUNC_(transfer_b_fields_att_to_device,
               TRANSFER_B_FIELDS_ATT_TO_DEVICE)(long* Mesh_pointer,
@@ -616,7 +655,7 @@
                                              realw* b_epsilondev_xy,
                                              realw* b_epsilondev_xz,
                                              realw* b_epsilondev_yz,
-                                             int* size_epsilondev){}
+                                             int* size_epsilondev) {} 
 
 void FC_FUNC_(transfer_fields_att_from_device,
               TRANSFER_FIELDS_ATT_FROM_DEVICE)(long* Mesh_pointer,
@@ -627,28 +666,27 @@
                                                realw* epsilondev_xy,
                                                realw* epsilondev_xz,
                                                realw* epsilondev_yz,
-                                               int* size_epsilondev){}
+                                               int* size_epsilondev) {} 
 
 void FC_FUNC_(transfer_kernels_el_to_host,
               TRANSFER_KERNELS_EL_TO_HOST)(long* Mesh_pointer,
                                                     realw* h_rho_kl,
                                                     realw* h_mu_kl,
                                                     realw* h_kappa_kl,
-                                                    int* NSPEC_AB){}
+                                                    int* NSPEC_AB) {} 
 
 void FC_FUNC_(transfer_kernels_noise_to_host,
               TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
-                                              realw* h_Sigma_kl,
-                                              int* NSPEC_AB){}
+                                                          realw* h_Sigma_kl,
+                                                          int* NSPEC_AB) {} 
 
-
 void FC_FUNC_(transfer_fields_ac_to_device,
               TRANSFER_FIELDS_AC_TO_DEVICE)(
                                                   int* size,
                                                   realw* potential_acoustic,
                                                   realw* potential_dot_acoustic,
                                                   realw* potential_dot_dot_acoustic,
-                                                  long* Mesh_pointer_f){}
+                                                  long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_b_fields_ac_to_device,
               TRANSFER_B_FIELDS_AC_TO_DEVICE)(
@@ -656,14 +694,14 @@
                                                     realw* b_potential_acoustic,
                                                     realw* b_potential_dot_acoustic,
                                                     realw* b_potential_dot_dot_acoustic,
-                                                    long* Mesh_pointer_f){}
+                                                    long* Mesh_pointer_f) {} 
 
-void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
-                                                                             int* size,
-                                                                             realw* potential_acoustic,
-                                                                             realw* potential_dot_acoustic,
-                                                                             realw* potential_dot_dot_acoustic,
-                                                                             long* Mesh_pointer_f){}
+void FC_FUNC_(transfer_fields_ac_from_device,
+              TRANSFER_FIELDS_AC_FROM_DEVICE)(int* size,
+                                              realw* potential_acoustic,
+                                              realw* potential_dot_acoustic,
+                                              realw* potential_dot_dot_acoustic,
+                                              long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_b_fields_ac_from_device,
               TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
@@ -671,37 +709,41 @@
                                                       realw* b_potential_acoustic,
                                                       realw* b_potential_dot_acoustic,
                                                       realw* b_potential_dot_dot_acoustic,
-                                                      long* Mesh_pointer_f){}
+                                                      long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_dot_dot_from_device,
-              TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, realw* potential_dot_dot_acoustic,long* Mesh_pointer_f){}
+              TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, realw* potential_dot_dot_acoustic,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_b_dot_dot_from_device,
-              TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, realw* b_potential_dot_dot_acoustic,long* Mesh_pointer_f){}
+              TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, realw* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_kernels_ac_to_host,
               TRANSFER_KERNELS_AC_TO_HOST)(long* Mesh_pointer,
                                                              realw* h_rho_ac_kl,
                                                              realw* h_kappa_ac_kl,
-                                                             int* NSPEC_AB){}
+                                                             int* NSPEC_AB) {} 
 
 void FC_FUNC_(transfer_kernels_hess_el_tohost,
-              TRANSFER_KERNELS_HESS_TO_HOST)(long* Mesh_pointer,
-                                             realw* h_hess_kl,
-                                             int* NSPEC_AB) {}
+              TRANSFER_KERNELS_HESS_EL_TOHOST)(long* Mesh_pointer,
+                                              realw* h_hess_kl,
+                                              int* NSPEC_AB) {} 
+
 void FC_FUNC_(transfer_kernels_hess_ac_tohost,
-              TRANSFER_KERNELS_HESS_TO_HOST)(long* Mesh_pointer,
+              TRANSFER_KERNELS_HESS_AC_TOHOST)(long* Mesh_pointer,
                                              realw* h_hess_ac_kl,
-                                             int* NSPEC_AB) {}
+                                             int* NSPEC_AB) {} 
 
-/* from file write_seismograms_cuda.cu */
 
+//
+// src/cuda/write_seismograms_cuda.cu
+//
+
 void FC_FUNC_(transfer_station_el_from_device,
               TRANSFER_STATION_EL_FROM_DEVICE)(realw* displ,realw* veloc,realw* accel,
                                                    realw* b_displ, realw* b_veloc, realw* b_accel,
                                                    long* Mesh_pointer_f,int* number_receiver_global,
                                                    int* ispec_selected_rec,int* ispec_selected_source,
-                                                   int* ibool,int* SIMULATION_TYPEf){}
+                                                   int* ibool,int* SIMULATION_TYPEf) {} 
 
 void FC_FUNC_(transfer_station_ac_from_device,
               TRANSFER_STATION_AC_FROM_DEVICE)(
@@ -716,7 +758,5 @@
                                                 int* ispec_selected_rec,
                                                 int* ispec_selected_source,
                                                 int* ibool,
-                                                int* SIMULATION_TYPEf){}
+                                                int* SIMULATION_TYPEf) {} 
 
-
-

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/transfer_fields_cuda.cu	2012-02-25 01:37:33 UTC (rev 19680)
@@ -180,7 +180,7 @@
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
-  print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40056);
+  print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_b_displ,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40056);
 
 }
 
@@ -448,12 +448,12 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
-                                                                                         int* size,
-                                                                                         realw* potential_acoustic,
-                                                                                         realw* potential_dot_acoustic,
-                                                                                         realw* potential_dot_dot_acoustic,
-                                                                                         long* Mesh_pointer_f) {
+void FC_FUNC_(transfer_fields_ac_from_device,
+              TRANSFER_FIELDS_AC_FROM_DEVICE)(int* size,
+                                              realw* potential_acoustic,
+                                              realw* potential_dot_acoustic,
+                                              realw* potential_dot_dot_acoustic,
+                                              long* Mesh_pointer_f) {
 TRACE("transfer_fields_ac_from_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -129,11 +129,11 @@
   logical :: fail_safe
   ! valence
   integer :: maxval_count_ibool_outer,maxval_count_ibool_inner
-  
+
   ! display absolute minimum possible number of colors, i.e., maximum valence (for information only)
   ! beware: this wastes memory (needs an additional array called "count_ibool")
   logical, parameter :: DISPLAY_MIN_POSSIBLE_COLORS = .false.
-  
+
   ! user output
   if( myrank == 0 ) then
     if( USE_DROUX_OPTIMIZATION ) then
@@ -150,7 +150,7 @@
   nspec_inner = 0
   nspec_domain = 0
   do ispec=1,nspec
-    ! domain elements  
+    ! domain elements
     if(ispec_is_d(ispec)) then
       ! outer/inner elements
       if(is_on_a_slice_edge(ispec)) then
@@ -189,13 +189,13 @@
     ! gets maximum values of valence for inner and outer element points
     call count_mesh_valence(ibool,is_on_a_slice_edge,ispec_is_d, &
                            myrank, nspec, nglob, &
-                           maxval_count_ibool_outer,maxval_count_ibool_inner)    
-  endif 
+                           maxval_count_ibool_outer,maxval_count_ibool_inner)
+  endif
 
   ! allocates mask
   allocate(mask_ibool(nglob),stat=ier)
   if( ier /= 0 ) stop 'error allocating mask_ibool array'
-  
+
   ! entry point for fail-safe mechanism when Droux 1993 fails
   999 continue
 
@@ -338,21 +338,21 @@
 
   nb_colors_inner_elements = icolor - nb_colors_outer_elements
 
-  ! Droux optimization: 
-  ! added this to create more balanced colors according to JJ Droux (1993)  
-  ! note: this might not find an optimial solution.  
+  ! Droux optimization:
+  ! added this to create more balanced colors according to JJ Droux (1993)
+  ! note: this might not find an optimial solution.
   !          we will probably have to try a few times with increasing colors
   if( try_Droux_coloring ) then
     ! initializes fail-safe mechanism
     fail_safe = .false.
-  
+
     ! tries to find a balanced coloring
     call balance_colors_Droux(ibool,is_on_a_slice_edge,ispec_is_d, &
                               myrank, nspec, nglob, &
                               color,nb_colors_outer_elements,nb_colors_inner_elements, &
                               nspec_outer,nspec_inner,maxval_count_ibool_inner, &
                               mask_ibool,fail_safe)
-    
+
     ! in case it fails go back to simple coloring algorithm
     if( fail_safe ) then
       try_Droux_coloring = .false.
@@ -445,7 +445,7 @@
   integer :: ispec
   integer :: iglob1,iglob2,iglob3,iglob4,iglob5,iglob6,iglob7,iglob8
   integer :: ier
-  
+
   ! allocates count array
   allocate(count_ibool(nglob),stat=ier)
   if( ier /= 0 ) stop 'error allocating count_ibool array'
@@ -521,7 +521,7 @@
   endif
 
   deallocate(count_ibool)
-  
+
   end subroutine count_mesh_valence
 
 !
@@ -554,17 +554,17 @@
   integer :: maxval_count_ibool_inner
 
   logical :: fail_safe
-  
+
   ! local parameters
   logical, dimension(:), allocatable :: icolor_conflict_found
   integer, dimension(:), allocatable :: nb_elems_in_this_color
   integer :: ispec,ispec2,icolor,ncolors,icolormin,icolormax,icolor_chosen,nb_elems_in_color_chosen
   integer :: nb_tries_of_Droux_1993,last_ispec_studied
   integer :: ier
-  
+
   ! debug outupt
   if( myrank == 0 ) then
-    write(IMAIN,*) '     balancing colors: Droux algorithm'  
+    write(IMAIN,*) '     balancing colors: Droux algorithm'
     write(IMAIN,*) '       initial number of outer element colors = ',nb_colors_outer_elements
     write(IMAIN,*) '       initial number of inner element colors = ',nb_colors_inner_elements
     write(IMAIN,*) '       initial number of total colors = ',nb_colors_outer_elements + nb_colors_inner_elements
@@ -655,7 +655,7 @@
                 mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
                 mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
               icolor_conflict_found(color(ispec2)) = .true.
-              
+
           endif ! domain elements
         enddo
       endif
@@ -745,7 +745,7 @@
   integer :: last_ispec_studied
   integer :: target_nb_elems_per_color,icolor_target
   integer :: ier
-    
+
   ! debug outupt
   if( myrank == 0 ) then
     write(IMAIN,*) '     balancing colors: simple algorithm'
@@ -770,9 +770,9 @@
   if( nb_colors_outer_elements > 0 ) then
     target_nb_elems_per_color = nspec_outer / nb_colors_outer_elements + 1
   else
-    target_nb_elems_per_color = 1  
+    target_nb_elems_per_color = 1
   endif
-  
+
   ! print *,'nspec_outer,target_nb_elems_per_color = ',nspec_outer,target_nb_elems_per_color
 
   ! count the initial number of elements in each color
@@ -782,7 +782,7 @@
   enddo
 
   ! do not balance the last one, because it will be balanced automatically by the others
-  do icolor = icolormin,icolormax-1 
+  do icolor = icolormin,icolormax-1
 
     ! if color is already balanced, do nothing
     ! (this works because in the initial set the number of elements per color decreases when the color number increases)
@@ -865,7 +865,7 @@
         ! if cannot find any other color to move this element to
         if (all(icolor_conflict_found(icolormin:icolormax))) cycle
 
-        ! loop on all the colors to determine the color with the smallest number of elements 
+        ! loop on all the colors to determine the color with the smallest number of elements
         ! and for which there is no conflict
         nb_elems_in_color_chosen = 2147000000 ! start with extremely large unrealistic value
         do icolor_target = icolormin,icolormax
@@ -878,11 +878,11 @@
 
         ! move the element to that new color
         ! remove element from its current color
-        nb_elems_in_this_color(color(ispec)) = nb_elems_in_this_color(color(ispec)) - 1 
+        nb_elems_in_this_color(color(ispec)) = nb_elems_in_this_color(color(ispec)) - 1
         color(ispec) = icolor_chosen
         ! and add it to the new color
-        nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1 
-        
+        nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
+
       endif ! domain elements
     enddo
 
@@ -908,7 +908,7 @@
   enddo
 
   ! do not balance the last one, because it will be balanced automatically by the others
-  do icolor = icolormin,icolormax-1 
+  do icolor = icolormin,icolormax-1
 
     ! if color is already balanced, do nothing
     ! (this works because in the initial set the number of elements per color decreases when the color number increases)
@@ -979,7 +979,7 @@
               mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
               mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
                 icolor_conflict_found(color(ispec2)) = .true.
-  
+
           endif ! domain elements
         enddo
 
@@ -990,9 +990,9 @@
         enddo
 
         ! if cannot find any other color to move this element to
-        if (all(icolor_conflict_found(icolormin:icolormax))) cycle 
+        if (all(icolor_conflict_found(icolormin:icolormax))) cycle
 
-        ! loops on all the colors to determine the color with the smallest number of elements 
+        ! loops on all the colors to determine the color with the smallest number of elements
         ! and for which there is no conflict
         nb_elems_in_color_chosen = 2147000000 ! start with extremely large unrealistic value
         do icolor_target = icolormin,icolormax
@@ -1008,8 +1008,8 @@
         nb_elems_in_this_color(color(ispec)) = nb_elems_in_this_color(color(ispec)) - 1
         color(ispec) = icolor_chosen
         ! and add it to the new color
-        nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1 
-      
+        nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
+
       endif ! domain elements
     enddo
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/check_mesh_resolution.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/check_mesh_resolution.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/check_mesh_resolution.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -61,7 +61,7 @@
   integer :: NSPEC_AB_global_min,NSPEC_AB_global_max,NSPEC_AB_global_sum
   integer :: NGLOB_AB_global_min,NGLOB_AB_global_max,NGLOB_AB_global_sum
   integer :: ispec,sizeprocs
-  
+
   !********************************************************************************
 
   ! empirical choice for distorted elements to estimate time step and period resolved:
@@ -81,7 +81,7 @@
   real(kind=CUSTOM_REAL),dimension(:),allocatable :: tmp1,tmp2
   integer:: ier
   character(len=256) :: filename,prname
-  
+
   ! initializations
   if( DT <= 0.0d0) then
     DT_PRESENT = .false.
@@ -116,7 +116,7 @@
     tmp1(:) = 0.0
     tmp2(:) = 0.0
   endif
-  
+
   ! checks courant number & minimum resolved period for each grid cell
   do ispec=1,NSPEC_AB
 
@@ -155,8 +155,8 @@
       ! debug: for vtk output
       if( SAVE_MESH_FILES ) tmp1(ispec) = cmax
     endif
-    
-    
+
+
     ! suggested timestep
     dt_suggested = COURANT_SUGGESTED * distance_min / max( vpmax,vsmax )
     dt_suggested_glob = min( dt_suggested_glob, dt_suggested)
@@ -334,7 +334,7 @@
 
   ! debug: for vtk output
   if( SAVE_MESH_FILES ) then
-    call create_name_database(prname,myrank,LOCAL_PATH) 
+    call create_name_database(prname,myrank,LOCAL_PATH)
     ! courant number
     if( DT_PRESENT ) then
       filename = trim(prname)//'res_courant_number'
@@ -348,7 +348,7 @@
                           xstore,ystore,zstore,ibool, &
                           tmp2,filename)
     deallocate(tmp1,tmp2)
-  endif  
+  endif
 
   end subroutine check_mesh_resolution
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in	2012-02-25 01:37:33 UTC (rev 19680)
@@ -264,7 +264,7 @@
 ! try several times with one more color before giving up
   logical, parameter :: USE_DROUX_OPTIMIZATION = .false.
   integer, parameter :: MAX_NB_TRIES_OF_DROUX_1993 = 15
-!  
+!
 ! postprocess the colors to balance them if Droux (1993) algorithm is not used
   logical, parameter :: BALANCE_COLORS_SIMPLE_ALGO = .true.
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/detect_surface.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -707,7 +707,7 @@
   integer, dimension(:), allocatable :: valence_external_mesh
   integer :: ispec,i,j,k,iglob,ier,count
   real(kind=CUSTOM_REAL),parameter :: TOLERANCE_DISTANCE = 0.9
- 
+
 ! detecting surface points/elements (based on valence check on NGLL points) for external mesh
   allocate(valence_external_mesh(nglob),stat=ier)
   if( ier /= 0 ) stop 'error allocate valence array'
@@ -724,7 +724,7 @@
                   + (zstore(ibool(1,1,1,:)) - zstore(ibool(2,1,1,:)))**2 )
   mindist = sqrt(mindist)
   distance = TOLERANCE_DISTANCE*mindist
-  
+
 ! sets valence value to one corresponding to process rank  for points on cross-sections
   do ispec = 1, nspec
     do k = 1, NGLLZ

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/serial.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/serial.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/serial.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -41,13 +41,13 @@
 
   implicit none
   real :: ct
-    
+
   ! note: for simplicity, we take cpu_time which returns the elapsed CPU time in seconds
   !          (instead of wall clock time for parallel MPI function)
   call cpu_time(ct)
-  
-  wtime = ct  
 
+  wtime = ct
+
   end function wtime
 
 !

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in	2012-02-25 01:37:33 UTC (rev 19680)
@@ -37,24 +37,32 @@
 #CUDA_LIBS= -L/u/dpeter/install/cuda/lib64 -lcudart -lcublas
 #MPI_INC= -I/usr/local/openmpi/1.4.3/gcc/x86_64/include
 
-# MR added CUDA
+# CUDA
+# with configure: ./configure --with-cuda CUDA_LIB=.. CUDA_INC=.. MPI_INC=..
 
+# default cuda libraries
 @COND_CUDA_TRUE at CUDA_LIBS = -lcuda -lcudart -lcublas
 @COND_CUDA_FALSE at CUDA_LIBS = 
 
-# with configure: ./configure CUDA_LIB=.. CUDA_INC=.. MPI_INC=..
 CUDA_LIB_LOCATION = @CUDA_LIB@
 CUDA_LINK = $(CUDA_LIB_LOCATION) $(CUDA_LIBS)
 CUDA_INC = @CUDA_INC@ -I../../
 MPI_INC = @MPI_INC@
 
 @COND_CUDA_TRUE at NVCC = nvcc
- at COND_CUDA_FALSE@NVCC = g++
+ at COND_CUDA_FALSE@NVCC = @CC@
 
 @COND_CUDA_TRUE at NVCC_FLAGS = $(CUDA_INC) $(MPI_INC) $(COND_MPI_CPPFLAGS) -DCUDA -gencode=arch=compute_20,code=sm_20
 @COND_CUDA_FALSE at NVCC_FLAGS = $(MPI_INC) $(COND_MPI_CPPFLAGS)
 
+# OpenMP
+# with configure: ./configure --with-openmp FLAGS_NO_CHECK="-openmp .." OPENMP_LIB=.. 
+ at COND_OPENMP_TRUE@OPENMP_LIBS = $(OPENMP_LIB)
+ at COND_OPENMP_FALSE@OPENMP_LIBS = 
+ at COND_OPENMP_TRUE@COND_OPENMP_FFLAGS = -DOPENMP_MODE
+ at COND_OPENMP_FALSE@COND_OPENMP_FFLAGS =
 
+
 FC = @FC@
 FCFLAGS = #@FCFLAGS@
 MPIFC = @MPIFC@
@@ -76,10 +84,10 @@
 # Output files directory
 OUTPUT=../../in_out_files/OUTPUT_FILES
 
-FCCOMPILE_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_CHECK) -I$(SHARED)
-FCCOMPILE_NO_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_NO_CHECK) -I$(SHARED)
-MPIFCCOMPILE_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_CHECK) -I$(SHARED)
-MPIFCCOMPILE_NO_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_NO_CHECK) -I$(SHARED)
+FCCOMPILE_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_CHECK) $(COND_OPENMP_FFLAGS) -I$(SHARED)
+FCCOMPILE_NO_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_NO_CHECK) $(COND_OPENMP_FFLAGS) -I$(SHARED)
+MPIFCCOMPILE_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_CHECK) $(COND_OPENMP_FFLAGS) -I$(SHARED)
+MPIFCCOMPILE_NO_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_NO_CHECK) $(COND_OPENMP_FFLAGS) -I$(SHARED)
 @COND_MPI_TRUE at FCLINK = $(MPIFCCOMPILE_NO_CHECK)
 @COND_MPI_FALSE at FCLINK = $(FCCOMPILE_NO_CHECK)
 
@@ -164,7 +172,6 @@
 	$O/compute_forces_elastic.o \
 	$O/compute_forces_elastic_Dev.o \
 	$O/compute_forces_elastic_noDev.o \
-	$O/compute_forces_elastic_Dev_openmp.o \
 	$O/compute_add_sources_acoustic.o \
 	$O/compute_add_sources_elastic.o \
 	$O/compute_coupling_acoustic_el.o \
@@ -198,6 +205,10 @@
 @COND_MPI_TRUE at COND_MPI_OBJECTS = $O/parallel.o
 @COND_MPI_FALSE at COND_MPI_OBJECTS = $O/serial.o
 
+# objects toggled between openmp and non-openmp version
+ at COND_OPENMP_TRUE@COND_OPENMP_OBJECTS = $O/compute_forces_elastic_Dev_openmp.openmp.o
+ at COND_OPENMP_FALSE@COND_OPENMP_OBJECTS =
+
 LIBSPECFEM = $L/libspecfem.a
 
 # objects for the pure Fortran version
@@ -231,8 +242,8 @@
 
 # rules for the pure Fortran version
 @COND_PYRE_FALSE@# solver also depends on values from mesher
- at COND_PYRE_FALSE@xspecfem3D: $(XSPECFEM_OBJECTS) $(COND_MPI_OBJECTS)
- at COND_PYRE_FALSE@	${FCLINK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(COND_MPI_OBJECTS) $(MPILIBS) $(CUDA_LINK)
+ at COND_PYRE_FALSE@xspecfem3D: $(XSPECFEM_OBJECTS) $(COND_MPI_OBJECTS) $(COND_OPENMP_OBJECTS)
+ at COND_PYRE_FALSE@	${FCLINK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(COND_MPI_OBJECTS) $(MPILIBS) $(COND_OPENMP_OBJECTS) $(OPENMP_LIBS) $(CUDA_LINK)
 @COND_PYRE_FALSE@
 
 convolve_source_timefunction: xconvolve_source_timefunction
@@ -283,10 +294,22 @@
 $O/%.o: %.f90 $(SHARED)constants.h
 	${FCCOMPILE_NO_CHECK} -c -o $@ $<
 
+$O/%.o: %.F90 $(SHARED)constants.h
+	${FCCOMPILE_NO_CHECK} -c -o $@ $<
+
 $O/%.shared.o: ${SHARED}%.f90 $(SHARED)constants.h
 	${FCCOMPILE_NO_CHECK} -c -o $@ $<
 
+$O/%.shared.o: ${SHARED}%.F90 $(SHARED)constants.h
+	${FCCOMPILE_NO_CHECK} -c -o $@ $<
+
 ###
+### OpenMP compilation
+###
+$O/%.openmp.o: %.f90 $(SHARED)constants.h
+	${FCCOMPILE_NO_CHECK} -c -o $@ $<
+
+###
 ### CUDA compilation
 ###
 $O/%.cuda.o: ${CUDAD}%.cu ../../config.h $(CUDAD)mesh_constants_cuda.h $(CUDAD)prepare_constants_cuda.h

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_add_sources_elastic.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -259,64 +259,64 @@
         if( ier /= 0 ) stop 'error allocating array adj_sourcearray'
 
         if (.not. SU_FORMAT) then
-      !!! read ascii adjoint sources
-           irec_local = 0
-           do irec = 1, nrec
-              ! compute source arrays
-              if (myrank == islice_selected_rec(irec)) then
-                 irec_local = irec_local + 1
-                 ! reads in **sta**.**net**.**LH**.adj files
-                 adj_source_file = trim(station_name(irec))//'.'//trim(network_name(irec))
-                 call compute_arrays_adjoint_source(myrank,adj_source_file, &
-                      xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
-                      adj_sourcearray, xigll,yigll,zigll, &
-                      it_sub_adj,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC)
-                 do itime = 1,NTSTEP_BETWEEN_READ_ADJSRC
-                    adj_sourcearrays(irec_local,itime,:,:,:,:) = adj_sourcearray(itime,:,:,:,:)
-                 enddo
+          !!! read ascii adjoint sources
+          irec_local = 0
+          do irec = 1, nrec
+            ! compute source arrays
+            if (myrank == islice_selected_rec(irec)) then
+               irec_local = irec_local + 1
+               ! reads in **sta**.**net**.**LH**.adj files
+               adj_source_file = trim(station_name(irec))//'.'//trim(network_name(irec))
+               call compute_arrays_adjoint_source(myrank,adj_source_file, &
+                    xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec), &
+                    adj_sourcearray, xigll,yigll,zigll, &
+                    it_sub_adj,NSTEP,NTSTEP_BETWEEN_READ_ADJSRC)
+               do itime = 1,NTSTEP_BETWEEN_READ_ADJSRC
+                  adj_sourcearrays(irec_local,itime,:,:,:,:) = adj_sourcearray(itime,:,:,:,:)
+               enddo
 
-              endif
-           enddo
+            endif
+          enddo
         else
-      !!! read SU adjoint sources
-           ! range of the block we need to read
-           it_start = NSTEP - it_sub_adj*NTSTEP_BETWEEN_READ_ADJSRC + 1
-           it_end   = it_start + NTSTEP_BETWEEN_READ_ADJSRC - 1
-           write(procname,"(i4)") myrank
-           ! read adjoint sources
-           open(unit=IIN_SU1, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dx_SU.adj', &
-                access='direct',recl=240+4*NSTEP)
-           open(unit=IIN_SU2, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dy_SU.adj', &
-                access='direct',recl=240+4*NSTEP)
-           open(unit=IIN_SU3, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dz_SU.adj', &
-                access='direct',recl=240+4*NSTEP)
-           do irec_local = 1,nrec_local
-              irec = number_receiver_global(irec_local)
-              read(IIN_SU1,rec=irec_local) r4head, adj_temp
-              adj_src(:,1)=adj_temp(it_start:it_end)
-              read(IIN_SU2,rec=irec_local) r4head, adj_temp
-              adj_src(:,2)=adj_temp(it_start:it_end)
-              read(IIN_SU3,rec=irec_local) r4head, adj_temp
-              adj_src(:,3)=adj_temp(it_start:it_end)
-              ! lagrange interpolators for receiver location
-              call lagrange_any(xi_receiver(irec),NGLLX,xigll,hxir,hpxir)
-              call lagrange_any(eta_receiver(irec),NGLLY,yigll,hetar,hpetar)
-              call lagrange_any(gamma_receiver(irec),NGLLZ,zigll,hgammar,hpgammar)
-              ! interpolates adjoint source onto GLL points within this element
-              do k = 1, NGLLZ
-                 do j = 1, NGLLY
-                    do i = 1, NGLLX
-                       adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
-                    enddo
-                 enddo
-              enddo
-              do itime = 1,NTSTEP_BETWEEN_READ_ADJSRC
-                 adj_sourcearrays(irec_local,itime,:,:,:,:) = adj_sourcearray(itime,:,:,:,:)
-              enddo
-           enddo
-           close(IIN_SU1)
-           close(IIN_SU2)
-           close(IIN_SU3)
+          !!! read SU adjoint sources
+          ! range of the block we need to read
+          it_start = NSTEP - it_sub_adj*NTSTEP_BETWEEN_READ_ADJSRC + 1
+          it_end   = it_start + NTSTEP_BETWEEN_READ_ADJSRC - 1
+          write(procname,"(i4)") myrank
+          ! read adjoint sources
+          open(unit=IIN_SU1, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dx_SU.adj', &
+              access='direct',recl=240+4*NSTEP)
+          open(unit=IIN_SU2, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dy_SU.adj', &
+              access='direct',recl=240+4*NSTEP)
+          open(unit=IIN_SU3, file=trim(adjustl(OUTPUT_FILES_PATH))//'../SEM/'//trim(adjustl(procname))//'_dz_SU.adj', &
+              access='direct',recl=240+4*NSTEP)
+          do irec_local = 1,nrec_local
+            irec = number_receiver_global(irec_local)
+            read(IIN_SU1,rec=irec_local) r4head, adj_temp
+            adj_src(:,1)=adj_temp(it_start:it_end)
+            read(IIN_SU2,rec=irec_local) r4head, adj_temp
+            adj_src(:,2)=adj_temp(it_start:it_end)
+            read(IIN_SU3,rec=irec_local) r4head, adj_temp
+            adj_src(:,3)=adj_temp(it_start:it_end)
+            ! lagrange interpolators for receiver location
+            call lagrange_any(xi_receiver(irec),NGLLX,xigll,hxir,hpxir)
+            call lagrange_any(eta_receiver(irec),NGLLY,yigll,hetar,hpetar)
+            call lagrange_any(gamma_receiver(irec),NGLLZ,zigll,hgammar,hpgammar)
+            ! interpolates adjoint source onto GLL points within this element
+            do k = 1, NGLLZ
+               do j = 1, NGLLY
+                  do i = 1, NGLLX
+                     adj_sourcearray(:,:,i,j,k) = hxir(i) * hetar(j) * hgammar(k) * adj_src(:,:)
+                  enddo
+               enddo
+            enddo
+            do itime = 1,NTSTEP_BETWEEN_READ_ADJSRC
+               adj_sourcearrays(irec_local,itime,:,:,:,:) = adj_sourcearray(itime,:,:,:,:)
+            enddo
+          enddo
+          close(IIN_SU1)
+          close(IIN_SU2)
+          close(IIN_SU3)
         endif !if(.not. SU_FORMAT)
 
         deallocate(adj_sourcearray)

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_acoustic.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -153,7 +153,7 @@
     endif
 
 
-   if(PML) then
+    if(PML) then
       ! transfers potentials to CPU
       if(GPU_MODE) call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
                               potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
@@ -186,9 +186,10 @@
 
     ! absorbing boundaries
     if(ABSORBING_CONDITIONS) then
-      if( PML .and. PML_USE_SOMMERFELD ) then
-        ! adds a Sommerfeld condition on the domain's absorbing boundaries
-        call PML_acoustic_abs_boundaries(phase_is_inner,NSPEC_AB,NGLOB_AB,&
+      if( PML ) then
+        if( PML_USE_SOMMERFELD ) then
+          ! adds a Sommerfeld condition on the domain's absorbing boundaries
+          call PML_acoustic_abs_boundaries(phase_is_inner,NSPEC_AB,NGLOB_AB,&
                         abs_boundary_jacobian2Dw,abs_boundary_ijk,abs_boundary_ispec, &
                         num_abs_boundary_faces, &
                         kappastore,ibool,ispec_is_inner, &
@@ -197,10 +198,10 @@
                         num_PML_ispec,PML_ispec,ispec_is_PML_inum,&
                         chi1_dot,chi2_t,chi2_t_dot,chi3_dot,chi4_dot,&
                         chi1_dot_dot,chi3_dot_dot,chi4_dot_dot)
-        ! transfers potentials back to GPU
-        if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+          ! transfers potentials back to GPU
+          if(GPU_MODE) call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
                               potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
-
+        endif
       else
        ! Stacey boundary conditions
         call compute_stacey_acoustic(NSPEC_AB,NGLOB_AB, &

Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90 (from rev 19670, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -0,0 +1,855 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! elastic solver
+
+subroutine compute_forces_elastic()
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+
+  implicit none
+
+  integer:: iphase
+  logical:: phase_is_inner
+
+! distinguishes two runs: for points on MPI interfaces, and points within the partitions
+  do iphase=1,2
+
+    !first for points on MPI interfaces
+    if( iphase == 1 ) then
+      phase_is_inner = .false.
+    else
+      phase_is_inner = .true.
+    endif
+
+
+! elastic term
+    if( .NOT. GPU_MODE ) then
+      if(USE_DEVILLE_PRODUCTS) then
+        ! uses Deville (2002) optimizations
+        call compute_forces_elastic_Dev_sim1(iphase)
+
+        ! adjoint simulations: backward/reconstructed wavefield
+        if( SIMULATION_TYPE == 3 ) &
+          call compute_forces_elastic_Dev_sim3(iphase)
+
+      else
+        ! no optimizations used
+        call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                        hprime_xx,hprime_yy,hprime_zz, &
+                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
+                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                        kappastore,mustore,jacobian,ibool, &
+                        ATTENUATION,&
+                        one_minus_sum_beta,factor_common, &
+                        alphaval,betaval,gammaval,&
+                        NSPEC_ATTENUATION_AB, &
+                        R_xx,R_yy,R_xy,R_xz,R_yz, &
+                        epsilondev_xx,epsilondev_yy,epsilondev_xy,&
+                        epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                        ANISOTROPY,NSPEC_ANISO, &
+                        c11store,c12store,c13store,c14store,c15store,c16store,&
+                        c22store,c23store,c24store,c25store,c26store,c33store,&
+                        c34store,c35store,c36store,c44store,c45store,c46store,&
+                        c55store,c56store,c66store, &
+                        SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                        is_moho_top,is_moho_bot, &
+                        dsdx_top,dsdx_bot, &
+                        ispec2D_moho_top,ispec2D_moho_bot, &
+                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                        phase_ispec_inner_elastic  )
+
+        ! adjoint simulations: backward/reconstructed wavefield
+        if( SIMULATION_TYPE == 3 ) &
+          call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,&
+                        b_displ,b_accel, &
+                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                        hprime_xx,hprime_yy,hprime_zz, &
+                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
+                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                        kappastore,mustore,jacobian,ibool, &
+                        ATTENUATION,&
+                        one_minus_sum_beta,factor_common, &
+                        b_alphaval,b_betaval,b_gammaval,&
+                        NSPEC_ATTENUATION_AB, &
+                        b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                        b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,&
+                        b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                        ANISOTROPY,NSPEC_ANISO, &
+                        c11store,c12store,c13store,c14store,c15store,c16store,&
+                        c22store,c23store,c24store,c25store,c26store,c33store,&
+                        c34store,c35store,c36store,c44store,c45store,c46store,&
+                        c55store,c56store,c66store, &
+                        SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                        is_moho_top,is_moho_bot, &
+                        b_dsdx_top,b_dsdx_bot, &
+                        ispec2D_moho_top,ispec2D_moho_bot, &
+                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                        phase_ispec_inner_elastic  )
+
+      endif
+
+    else
+      ! on GPU
+      ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
+      call compute_forces_elastic_cuda(Mesh_pointer, iphase, &
+                                      nspec_outer_elastic, &
+                                      nspec_inner_elastic, &
+                                      SIMULATION_TYPE, &
+                                      COMPUTE_AND_STORE_STRAIN,ATTENUATION,ANISOTROPY)
+    endif ! GPU_MODE
+
+
+! adds elastic absorbing boundary term to acceleration (Stacey conditions)
+    if(ABSORBING_CONDITIONS) &
+      call compute_stacey_elastic(NSPEC_AB,NGLOB_AB,accel, &
+                        ibool,ispec_is_inner,phase_is_inner, &
+                        abs_boundary_normal,abs_boundary_jacobian2Dw, &
+                        abs_boundary_ijk,abs_boundary_ispec, &
+                        num_abs_boundary_faces, &
+                        veloc,rho_vp,rho_vs, &
+                        ispec_is_elastic,SIMULATION_TYPE,SAVE_FORWARD, &
+                        NSTEP,it,NGLOB_ADJOINT,b_accel, &
+                        b_num_abs_boundary_faces,b_reclen_field,b_absorb_field,&
+                        GPU_MODE,Mesh_pointer)
+
+! acoustic coupling
+    if( ACOUSTIC_SIMULATION ) then
+      if( .NOT. GPU_MODE ) then
+        call compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, &
+                        ibool,accel,potential_dot_dot_acoustic, &
+                        num_coupling_ac_el_faces, &
+                        coupling_ac_el_ispec,coupling_ac_el_ijk, &
+                        coupling_ac_el_normal, &
+                        coupling_ac_el_jacobian2Dw, &
+                        ispec_is_inner,phase_is_inner)
+
+        ! adjoint simulations
+        if( SIMULATION_TYPE == 3 ) &
+          call compute_coupling_elastic_ac(NSPEC_ADJOINT,NGLOB_ADJOINT, &
+                        ibool,b_accel,b_potential_dot_dot_acoustic, &
+                        num_coupling_ac_el_faces, &
+                        coupling_ac_el_ispec,coupling_ac_el_ijk, &
+                        coupling_ac_el_normal, &
+                        coupling_ac_el_jacobian2Dw, &
+                        ispec_is_inner,phase_is_inner)
+      else
+        ! on GPU
+        if( num_coupling_ac_el_faces > 0 ) &
+          call compute_coupling_el_ac_cuda(Mesh_pointer,phase_is_inner, &
+                                              num_coupling_ac_el_faces,SIMULATION_TYPE)
+
+      endif
+    endif
+
+
+! poroelastic coupling
+! not implemented yet
+!    if( POROELASTIC_SIMULATION ) &
+!      call compute_coupling_elastic_poro()
+
+! adds source term (single-force/moment-tensor solution)
+    call compute_add_sources_elastic( NSPEC_AB,NGLOB_AB,accel, &
+                        ibool,ispec_is_inner,phase_is_inner, &
+                        NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,&
+                        xi_source,eta_source,gamma_source,nu_source, &
+                        hdur,hdur_gaussian,tshift_cmt,dt,t0,sourcearrays, &
+                        ispec_is_elastic,SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
+                        nrec,islice_selected_rec,ispec_selected_rec, &
+                        nadj_rec_local,adj_sourcearrays,b_accel, &
+                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY, &
+                        GPU_MODE, Mesh_pointer )
+
+    ! assemble all the contributions between slices using MPI
+    if( phase_is_inner .eqv. .false. ) then
+       ! sends accel values to corresponding MPI interface neighbors
+       if(.NOT. GPU_MODE) then
+          call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,accel, &
+               buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+               num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+               nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+               my_neighbours_ext_mesh, &
+               request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+       else ! GPU_MODE==1
+          call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, accel,&
+                        buffer_send_vector_ext_mesh,&
+                        num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
+                        nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,1) ! <-- 1 == fwd accel
+          call assemble_MPI_vector_send_cuda(NPROC, &
+               buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+               num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+               nibool_interfaces_ext_mesh,&
+               my_neighbours_ext_mesh, &
+               request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+       endif ! GPU_MODE
+
+       ! adjoint simulations
+       if( SIMULATION_TYPE == 3 ) then
+          if(.NOT. GPU_MODE) then
+             call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_accel, &
+                  b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
+                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                  nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+                  my_neighbours_ext_mesh, &
+                  b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
+          else ! GPU_MODE == 1
+             call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, b_accel,&
+                       b_buffer_send_vector_ext_mesh,&
+                       num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
+                       nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
+             call assemble_MPI_vector_send_cuda(NPROC, &
+                  b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
+                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                  nibool_interfaces_ext_mesh,&
+                  my_neighbours_ext_mesh, &
+                  b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
+
+          endif ! GPU
+       endif !adjoint
+
+    else
+      ! waits for send/receive requests to be completed and assembles values
+      if(.NOT. GPU_MODE) then
+         call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_AB,accel, &
+                            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                            max_nibool_interfaces_ext_mesh, &
+                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+      else ! GPU_MODE == 1
+         call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer,&
+                            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                            max_nibool_interfaces_ext_mesh, &
+                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh,1)
+      endif
+      ! adjoint simulations
+      if( SIMULATION_TYPE == 3 ) then
+         if(.NOT. GPU_MODE) then
+            call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_accel, &
+                             b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                             max_nibool_interfaces_ext_mesh, &
+                             nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                             b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
+         else ! GPU_MODE == 1
+            call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer,&
+                              b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                              max_nibool_interfaces_ext_mesh, &
+                              nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                              b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh,3)
+         endif
+      endif !adjoint
+
+    endif
+
+    !! DK DK May 2009: removed this because now each slice of a CUBIT + SCOTCH mesh
+    !! DK DK May 2009: has a different number of spectral elements and therefore
+    !! DK DK May 2009: only the general non-blocking MPI routines assemble_MPI_vector_ext_mesh_s
+    !! DK DK May 2009: and assemble_MPI_vector_ext_mesh_w above can be used.
+    !! DK DK May 2009: For adjoint runs below (SIMULATION_TYPE == 3) they should be used as well.
+
+ enddo
+
+ ! multiplies with inverse of mass matrix (note: rmass has been inverted already)
+ if(.NOT. GPU_MODE) then
+    accel(1,:) = accel(1,:)*rmass(:)
+    accel(2,:) = accel(2,:)*rmass(:)
+    accel(3,:) = accel(3,:)*rmass(:)
+    ! adjoint simulations
+    if (SIMULATION_TYPE == 3) then
+       b_accel(1,:) = b_accel(1,:)*rmass(:)
+       b_accel(2,:) = b_accel(2,:)*rmass(:)
+       b_accel(3,:) = b_accel(3,:)*rmass(:)
+    endif !adjoint
+ else ! GPU_MODE == 1
+    call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS)
+ endif
+
+! updates acceleration with ocean load term
+  if(OCEANS) then
+    if( .NOT. GPU_MODE ) then
+      call elastic_ocean_load(NSPEC_AB,NGLOB_AB, &
+                        ibool,rmass,rmass_ocean_load,accel, &
+                        free_surface_normal,free_surface_ijk,free_surface_ispec, &
+                        num_free_surface_faces,SIMULATION_TYPE, &
+                        NGLOB_ADJOINT,b_accel)
+    else
+      ! on GPU
+      call elastic_ocean_load_cuda(Mesh_pointer,SIMULATION_TYPE)
+    endif
+  endif
+
+! updates velocities
+! Newmark finite-difference time scheme with elastic domains:
+! (see e.g. Hughes, 1987; Chaljub et al., 2003)
+!
+! u(t+delta_t) = u(t) + delta_t  v(t) + 1/2  delta_t**2 a(t)
+! v(t+delta_t) = v(t) + 1/2 delta_t a(t) + 1/2 delta_t a(t+delta_t)
+! a(t+delta_t) = 1/M_elastic ( -K_elastic u(t+delta) + B_elastic chi_dot_dot(t+delta_t) + f( t+delta_t) )
+!
+! where
+!   u, v, a are displacement,velocity & acceleration
+!   M is mass matrix, K stiffness matrix and B boundary term for acoustic/elastic domains
+!   f denotes a source term (acoustic/elastic)
+!   chi_dot_dot is acoustic (fluid) potential ( dotted twice with respect to time)
+!
+! corrector:
+!   updates the velocity term which requires a(t+delta)
+! GPU_MODE: this is handled in 'kernel_3' at the same time as accel*rmass
+  if(.NOT. GPU_MODE) then
+     veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
+     ! adjoint simulations
+     if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
+  else ! GPU_MODE == 1
+    if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2)
+  endif
+
+
+end subroutine compute_forces_elastic
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+subroutine elastic_ocean_load(NSPEC_AB,NGLOB_AB, &
+                        ibool,rmass,rmass_ocean_load,accel, &
+                        free_surface_normal,free_surface_ijk,free_surface_ispec, &
+                        num_free_surface_faces,SIMULATION_TYPE, &
+                        NGLOB_ADJOINT,b_accel)
+
+! updates acceleration with ocean load term:
+! approximates ocean-bottom continuity of pressure & displacement for longer period waves (> ~20s ),
+! assuming incompressible fluid column above bathymetry ocean bottom
+
+  implicit none
+
+  include 'constants.h'
+
+  integer :: NSPEC_AB,NGLOB_AB
+
+  real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_AB),intent(inout) :: accel
+  real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: rmass,rmass_ocean_load
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
+
+  ! free surface
+  integer :: num_free_surface_faces
+  real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces)
+  integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
+  integer :: free_surface_ispec(num_free_surface_faces)
+
+  ! adjoint simulations
+  integer :: SIMULATION_TYPE,NGLOB_ADJOINT
+  real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_ADJOINT):: b_accel
+
+! local parameters
+  real(kind=CUSTOM_REAL) :: nx,ny,nz
+  real(kind=CUSTOM_REAL) :: additional_term,force_normal_comp
+  integer :: i,j,k,ispec,iglob
+  integer :: igll,iface
+  logical,dimension(NGLOB_AB) :: updated_dof_ocean_load
+  ! adjoint locals
+  real(kind=CUSTOM_REAL) :: b_additional_term,b_force_normal_comp
+
+  !   initialize the updates
+  updated_dof_ocean_load(:) = .false.
+
+  ! for surface elements exactly at the top of the model (ocean bottom)
+  do iface = 1,num_free_surface_faces
+
+    ispec = free_surface_ispec(iface)
+    do igll = 1, NGLLSQUARE
+      i = free_surface_ijk(1,igll,iface)
+      j = free_surface_ijk(2,igll,iface)
+      k = free_surface_ijk(3,igll,iface)
+
+      ! get global point number
+      iglob = ibool(i,j,k,ispec)
+
+      ! only update once
+      if(.not. updated_dof_ocean_load(iglob)) then
+
+        ! get normal
+        nx = free_surface_normal(1,igll,iface)
+        ny = free_surface_normal(2,igll,iface)
+        nz = free_surface_normal(3,igll,iface)
+
+        ! make updated component of right-hand side
+        ! we divide by rmass() which is 1 / M
+        ! we use the total force which includes the Coriolis term above
+        force_normal_comp = ( accel(1,iglob)*nx + &
+                              accel(2,iglob)*ny + &
+                              accel(3,iglob)*nz ) / rmass(iglob)
+
+        additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * force_normal_comp
+
+        accel(1,iglob) = accel(1,iglob) + additional_term * nx
+        accel(2,iglob) = accel(2,iglob) + additional_term * ny
+        accel(3,iglob) = accel(3,iglob) + additional_term * nz
+
+        ! adjoint simulations
+        if (SIMULATION_TYPE == 3) then
+          b_force_normal_comp = ( b_accel(1,iglob)*nx + &
+                                  b_accel(2,iglob)*ny + &
+                                  b_accel(3,iglob)*nz) / rmass(iglob)
+          b_additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * b_force_normal_comp
+
+          b_accel(1,iglob) = b_accel(1,iglob) + b_additional_term * nx
+          b_accel(2,iglob) = b_accel(2,iglob) + b_additional_term * ny
+          b_accel(3,iglob) = b_accel(3,iglob) + b_additional_term * nz
+        endif !adjoint
+
+        ! done with this point
+        updated_dof_ocean_load(iglob) = .true.
+
+      endif
+
+    enddo ! igll
+  enddo ! iface
+
+end subroutine elastic_ocean_load
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! distributes routines according to chosen NGLLX in constants.h
+
+!daniel: note -- i put it here rather than in compute_forces_elastic_Dev.f90 because compiler complains that:
+! " The storage extent of the dummy argument exceeds that of the actual argument. "
+
+subroutine compute_forces_elastic_Dev_sim1(iphase)
+
+! forward simulations
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+
+  implicit none
+
+  integer,intent(in) :: iphase
+
+  select case(NGLLX)
+
+  case (5)
+
+!----------------------------------------------------------------------------------------------
+
+! OpenMP routine flag for testing & benchmarking forward runs only
+! configure additional flag, e.g.: FLAGS_NO_CHECK="-O3 -DOPENMP_MODE -openmp"
+
+!----------------------------------------------------------------------------------------------
+#ifdef OPENMP_MODE
+    call compute_forces_elastic_Dev_openmp(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+           hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+           wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+           kappastore,mustore,jacobian,ibool, &
+           ATTENUATION, &
+           one_minus_sum_beta,factor_common, &
+           alphaval,betaval,gammaval, &
+           NSPEC_ATTENUATION_AB, &
+           R_xx,R_yy,R_xy,R_xz,R_yz, &
+           epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+           epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+           ANISOTROPY,NSPEC_ANISO, &
+           c11store,c12store,c13store,c14store,c15store,c16store,&
+           c22store,c23store,c24store,c25store,c26store,c33store,&
+           c34store,c35store,c36store,c44store,c45store,c46store,&
+           c55store,c56store,c66store, &
+           SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+           NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+           is_moho_top,is_moho_bot, &
+           dsdx_top,dsdx_bot, &
+           ispec2D_moho_top,ispec2D_moho_bot, &
+           num_phase_ispec_elastic,&
+           phase_ispec_inner_elastic,&
+           num_colors_outer_elastic,num_colors_inner_elastic)
+#else
+    call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+             hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+             wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+             kappastore,mustore,jacobian,ibool, &
+             ATTENUATION, &
+             one_minus_sum_beta,factor_common, &
+             alphaval,betaval,gammaval, &
+             NSPEC_ATTENUATION_AB, &
+             R_xx,R_yy,R_xy,R_xz,R_yz, &
+             epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+             epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+             ANISOTROPY,NSPEC_ANISO, &
+             c11store,c12store,c13store,c14store,c15store,c16store,&
+             c22store,c23store,c24store,c25store,c26store,c33store,&
+             c34store,c35store,c36store,c44store,c45store,c46store,&
+             c55store,c56store,c66store, &
+             SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+             NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+             is_moho_top,is_moho_bot, &
+             dsdx_top,dsdx_bot, &
+             ispec2D_moho_top,ispec2D_moho_bot, &
+             num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+             phase_ispec_inner_elastic )
+#endif
+
+  case (6)
+    call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                    kappastore,mustore,jacobian,ibool, &
+                    ATTENUATION, &
+                    one_minus_sum_beta,factor_common, &
+                    alphaval,betaval,gammaval, &
+                    NSPEC_ATTENUATION_AB, &
+                    R_xx,R_yy,R_xy,R_xz,R_yz, &
+                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                    ANISOTROPY,NSPEC_ANISO, &
+                    c11store,c12store,c13store,c14store,c15store,c16store,&
+                    c22store,c23store,c24store,c25store,c26store,c33store,&
+                    c34store,c35store,c36store,c44store,c45store,c46store,&
+                    c55store,c56store,c66store, &
+                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                    is_moho_top,is_moho_bot, &
+                    dsdx_top,dsdx_bot, &
+                    ispec2D_moho_top,ispec2D_moho_bot, &
+                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                    phase_ispec_inner_elastic )
+
+  case (7)
+    call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                    kappastore,mustore,jacobian,ibool, &
+                    ATTENUATION, &
+                    one_minus_sum_beta,factor_common, &
+                    alphaval,betaval,gammaval, &
+                    NSPEC_ATTENUATION_AB, &
+                    R_xx,R_yy,R_xy,R_xz,R_yz, &
+                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                    ANISOTROPY,NSPEC_ANISO, &
+                    c11store,c12store,c13store,c14store,c15store,c16store,&
+                    c22store,c23store,c24store,c25store,c26store,c33store,&
+                    c34store,c35store,c36store,c44store,c45store,c46store,&
+                    c55store,c56store,c66store, &
+                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                    is_moho_top,is_moho_bot, &
+                    dsdx_top,dsdx_bot, &
+                    ispec2D_moho_top,ispec2D_moho_bot, &
+                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                    phase_ispec_inner_elastic )
+
+  case (8)
+    call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                    kappastore,mustore,jacobian,ibool, &
+                    ATTENUATION, &
+                    one_minus_sum_beta,factor_common, &
+                    alphaval,betaval,gammaval, &
+                    NSPEC_ATTENUATION_AB, &
+                    R_xx,R_yy,R_xy,R_xz,R_yz, &
+                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                    ANISOTROPY,NSPEC_ANISO, &
+                    c11store,c12store,c13store,c14store,c15store,c16store,&
+                    c22store,c23store,c24store,c25store,c26store,c33store,&
+                    c34store,c35store,c36store,c44store,c45store,c46store,&
+                    c55store,c56store,c66store, &
+                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                    is_moho_top,is_moho_bot, &
+                    dsdx_top,dsdx_bot, &
+                    ispec2D_moho_top,ispec2D_moho_bot, &
+                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                    phase_ispec_inner_elastic )
+
+  case (9)
+    call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                    kappastore,mustore,jacobian,ibool, &
+                    ATTENUATION, &
+                    one_minus_sum_beta,factor_common, &
+                    alphaval,betaval,gammaval, &
+                    NSPEC_ATTENUATION_AB, &
+                    R_xx,R_yy,R_xy,R_xz,R_yz, &
+                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                    ANISOTROPY,NSPEC_ANISO, &
+                    c11store,c12store,c13store,c14store,c15store,c16store,&
+                    c22store,c23store,c24store,c25store,c26store,c33store,&
+                    c34store,c35store,c36store,c44store,c45store,c46store,&
+                    c55store,c56store,c66store, &
+                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                    is_moho_top,is_moho_bot, &
+                    dsdx_top,dsdx_bot, &
+                    ispec2D_moho_top,ispec2D_moho_bot, &
+                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                    phase_ispec_inner_elastic )
+
+  case (10)
+    call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
+                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                    kappastore,mustore,jacobian,ibool, &
+                    ATTENUATION, &
+                    one_minus_sum_beta,factor_common, &
+                    alphaval,betaval,gammaval, &
+                    NSPEC_ATTENUATION_AB, &
+                    R_xx,R_yy,R_xy,R_xz,R_yz, &
+                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                    ANISOTROPY,NSPEC_ANISO, &
+                    c11store,c12store,c13store,c14store,c15store,c16store,&
+                    c22store,c23store,c24store,c25store,c26store,c33store,&
+                    c34store,c35store,c36store,c44store,c45store,c46store,&
+                    c55store,c56store,c66store, &
+                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                    is_moho_top,is_moho_bot, &
+                    dsdx_top,dsdx_bot, &
+                    ispec2D_moho_top,ispec2D_moho_bot, &
+                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                    phase_ispec_inner_elastic )
+
+  case default
+
+    stop 'error no Deville routine available for chosen NGLLX'
+
+  end select
+
+end subroutine compute_forces_elastic_Dev_sim1
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+subroutine compute_forces_elastic_Dev_sim3(iphase)
+
+! uses backward/reconstructed displacement and acceleration arrays
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+
+  implicit none
+
+  integer,intent(in) :: iphase
+
+  select case(NGLLX)
+
+  case (5)
+    call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case (6)
+    call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case (7)
+    call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case (8)
+    call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case (9)
+    call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case (10)
+    call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case default
+
+    stop 'error no Deville routine available for chosen NGLLX'
+
+  end select
+
+
+end subroutine compute_forces_elastic_Dev_sim3
+
+

Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -1,857 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! elastic solver
-
-subroutine compute_forces_elastic()
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-
-  implicit none
-
-  integer:: iphase
-  logical:: phase_is_inner
-
-! distinguishes two runs: for points on MPI interfaces, and points within the partitions
-  do iphase=1,2
-
-    !first for points on MPI interfaces
-    if( iphase == 1 ) then
-      phase_is_inner = .false.
-    else
-      phase_is_inner = .true.
-    endif
-
-
-! elastic term
-    if( .NOT. GPU_MODE ) then
-      if(USE_DEVILLE_PRODUCTS) then
-        ! uses Deville (2002) optimizations
-        call compute_forces_elastic_Dev_sim1(iphase)
-
-        ! adjoint simulations: backward/reconstructed wavefield
-        if( SIMULATION_TYPE == 3 ) &
-          call compute_forces_elastic_Dev_sim3(iphase)
-
-      else
-        ! no optimizations used
-        call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_yy,hprime_zz, &
-                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION,&
-                        one_minus_sum_beta,factor_common, &
-                        alphaval,betaval,gammaval,&
-                        NSPEC_ATTENUATION_AB, &
-                        R_xx,R_yy,R_xy,R_xz,R_yz, &
-                        epsilondev_xx,epsilondev_yy,epsilondev_xy,&
-                        epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        dsdx_top,dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic  )
-
-        ! adjoint simulations: backward/reconstructed wavefield
-        if( SIMULATION_TYPE == 3 ) &
-          call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,&
-                        b_displ,b_accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_yy,hprime_zz, &
-                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION,&
-                        one_minus_sum_beta,factor_common, &
-                        b_alphaval,b_betaval,b_gammaval,&
-                        NSPEC_ATTENUATION_AB, &
-                        b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                        b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,&
-                        b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                        is_moho_top,is_moho_bot, &
-                        b_dsdx_top,b_dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                        phase_ispec_inner_elastic  )
-
-      endif
-
-    else
-      ! on GPU
-      ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
-      call compute_forces_elastic_cuda(Mesh_pointer, iphase, &
-                                      nspec_outer_elastic, &
-                                      nspec_inner_elastic, &
-                                      SIMULATION_TYPE, &
-                                      COMPUTE_AND_STORE_STRAIN,ATTENUATION,ANISOTROPY)
-    endif ! GPU_MODE
-
-
-! adds elastic absorbing boundary term to acceleration (Stacey conditions)
-    if(ABSORBING_CONDITIONS) &
-      call compute_stacey_elastic(NSPEC_AB,NGLOB_AB,accel, &
-                        ibool,ispec_is_inner,phase_is_inner, &
-                        abs_boundary_normal,abs_boundary_jacobian2Dw, &
-                        abs_boundary_ijk,abs_boundary_ispec, &
-                        num_abs_boundary_faces, &
-                        veloc,rho_vp,rho_vs, &
-                        ispec_is_elastic,SIMULATION_TYPE,SAVE_FORWARD, &
-                        NSTEP,it,NGLOB_ADJOINT,b_accel, &
-                        b_num_abs_boundary_faces,b_reclen_field,b_absorb_field,&
-                        GPU_MODE,Mesh_pointer)
-
-! acoustic coupling
-    if( ACOUSTIC_SIMULATION ) then
-      if( .NOT. GPU_MODE ) then
-        call compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, &
-                        ibool,accel,potential_dot_dot_acoustic, &
-                        num_coupling_ac_el_faces, &
-                        coupling_ac_el_ispec,coupling_ac_el_ijk, &
-                        coupling_ac_el_normal, &
-                        coupling_ac_el_jacobian2Dw, &
-                        ispec_is_inner,phase_is_inner)
-
-        ! adjoint simulations
-        if( SIMULATION_TYPE == 3 ) &
-          call compute_coupling_elastic_ac(NSPEC_ADJOINT,NGLOB_ADJOINT, &
-                        ibool,b_accel,b_potential_dot_dot_acoustic, &
-                        num_coupling_ac_el_faces, &
-                        coupling_ac_el_ispec,coupling_ac_el_ijk, &
-                        coupling_ac_el_normal, &
-                        coupling_ac_el_jacobian2Dw, &
-                        ispec_is_inner,phase_is_inner)
-      else
-        ! on GPU
-        if( num_coupling_ac_el_faces > 0 ) &
-          call compute_coupling_el_ac_cuda(Mesh_pointer,phase_is_inner, &
-                                              num_coupling_ac_el_faces,SIMULATION_TYPE)
-
-      endif
-    endif
-
-
-! poroelastic coupling
-! not implemented yet
-!    if( POROELASTIC_SIMULATION ) &
-!      call compute_coupling_elastic_poro()
-
-! adds source term (single-force/moment-tensor solution)
-    call compute_add_sources_elastic( NSPEC_AB,NGLOB_AB,accel, &
-                        ibool,ispec_is_inner,phase_is_inner, &
-                        NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,&
-                        xi_source,eta_source,gamma_source,nu_source, &
-                        hdur,hdur_gaussian,tshift_cmt,dt,t0,sourcearrays, &
-                        ispec_is_elastic,SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
-                        nrec,islice_selected_rec,ispec_selected_rec, &
-                        nadj_rec_local,adj_sourcearrays,b_accel, &
-                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY, &
-                        GPU_MODE, Mesh_pointer )
-
-    ! assemble all the contributions between slices using MPI
-    if( phase_is_inner .eqv. .false. ) then
-       ! sends accel values to corresponding MPI interface neighbors
-       if(.NOT. GPU_MODE) then
-          call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,accel, &
-               buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
-               num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-               nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
-               my_neighbours_ext_mesh, &
-               request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
-       else ! GPU_MODE==1
-          call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, accel,&
-                        buffer_send_vector_ext_mesh,&
-                        num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
-                        nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,1) ! <-- 1 == fwd accel
-          call assemble_MPI_vector_send_cuda(NPROC, &
-               buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
-               num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-               nibool_interfaces_ext_mesh,&
-               my_neighbours_ext_mesh, &
-               request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
-       endif ! GPU_MODE
-
-       ! adjoint simulations
-       if( SIMULATION_TYPE == 3 ) then
-          if(.NOT. GPU_MODE) then
-             call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_accel, &
-                  b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
-                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                  nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
-                  my_neighbours_ext_mesh, &
-                  b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
-          else ! GPU_MODE == 1
-             call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, b_accel,&
-                       b_buffer_send_vector_ext_mesh,&
-                       num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
-                       nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
-             call assemble_MPI_vector_send_cuda(NPROC, &
-                  b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
-                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                  nibool_interfaces_ext_mesh,&
-                  my_neighbours_ext_mesh, &
-                  b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
-
-          endif ! GPU
-       endif !adjoint
-
-    else
-      ! waits for send/receive requests to be completed and assembles values
-      if(.NOT. GPU_MODE) then
-         call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_AB,accel, &
-                            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-                            max_nibool_interfaces_ext_mesh, &
-                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
-      else ! GPU_MODE == 1
-         call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer,&
-                            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-                            max_nibool_interfaces_ext_mesh, &
-                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh,1)
-      endif
-      ! adjoint simulations
-      if( SIMULATION_TYPE == 3 ) then
-         if(.NOT. GPU_MODE) then
-            call assemble_MPI_vector_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_accel, &
-                             b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-                             max_nibool_interfaces_ext_mesh, &
-                             nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                             b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
-         else ! GPU_MODE == 1
-            call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer,&
-                              b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-                              max_nibool_interfaces_ext_mesh, &
-                              nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                              b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh,3)
-         endif
-      endif !adjoint
-
-    endif
-
-    !! DK DK May 2009: removed this because now each slice of a CUBIT + SCOTCH mesh
-    !! DK DK May 2009: has a different number of spectral elements and therefore
-    !! DK DK May 2009: only the general non-blocking MPI routines assemble_MPI_vector_ext_mesh_s
-    !! DK DK May 2009: and assemble_MPI_vector_ext_mesh_w above can be used.
-    !! DK DK May 2009: For adjoint runs below (SIMULATION_TYPE == 3) they should be used as well.
-
- enddo
-
- ! multiplies with inverse of mass matrix (note: rmass has been inverted already)
- if(.NOT. GPU_MODE) then
-    accel(1,:) = accel(1,:)*rmass(:)
-    accel(2,:) = accel(2,:)*rmass(:)
-    accel(3,:) = accel(3,:)*rmass(:)
-    ! adjoint simulations
-    if (SIMULATION_TYPE == 3) then
-       b_accel(1,:) = b_accel(1,:)*rmass(:)
-       b_accel(2,:) = b_accel(2,:)*rmass(:)
-       b_accel(3,:) = b_accel(3,:)*rmass(:)
-    endif !adjoint
- else ! GPU_MODE == 1
-    call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2,OCEANS)
- endif
-
-! updates acceleration with ocean load term
-  if(OCEANS) then
-    if( .NOT. GPU_MODE ) then
-      call elastic_ocean_load(NSPEC_AB,NGLOB_AB, &
-                        ibool,rmass,rmass_ocean_load,accel, &
-                        free_surface_normal,free_surface_ijk,free_surface_ispec, &
-                        num_free_surface_faces,SIMULATION_TYPE, &
-                        NGLOB_ADJOINT,b_accel)
-    else
-      ! on GPU
-      call elastic_ocean_load_cuda(Mesh_pointer,SIMULATION_TYPE)
-    endif
-  endif
-
-! updates velocities
-! Newmark finite-difference time scheme with elastic domains:
-! (see e.g. Hughes, 1987; Chaljub et al., 2003)
-!
-! u(t+delta_t) = u(t) + delta_t  v(t) + 1/2  delta_t**2 a(t)
-! v(t+delta_t) = v(t) + 1/2 delta_t a(t) + 1/2 delta_t a(t+delta_t)
-! a(t+delta_t) = 1/M_elastic ( -K_elastic u(t+delta) + B_elastic chi_dot_dot(t+delta_t) + f( t+delta_t) )
-!
-! where
-!   u, v, a are displacement,velocity & acceleration
-!   M is mass matrix, K stiffness matrix and B boundary term for acoustic/elastic domains
-!   f denotes a source term (acoustic/elastic)
-!   chi_dot_dot is acoustic (fluid) potential ( dotted twice with respect to time)
-!
-! corrector:
-!   updates the velocity term which requires a(t+delta)
-! GPU_MODE: this is handled in 'kernel_3' at the same time as accel*rmass
-  if(.NOT. GPU_MODE) then
-     veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
-     ! adjoint simulations
-     if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
-  else ! GPU_MODE == 1
-    if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,SIMULATION_TYPE,b_deltatover2)
-  endif
-
-
-end subroutine compute_forces_elastic
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-subroutine elastic_ocean_load(NSPEC_AB,NGLOB_AB, &
-                        ibool,rmass,rmass_ocean_load,accel, &
-                        free_surface_normal,free_surface_ijk,free_surface_ispec, &
-                        num_free_surface_faces,SIMULATION_TYPE, &
-                        NGLOB_ADJOINT,b_accel)
-
-! updates acceleration with ocean load term:
-! approximates ocean-bottom continuity of pressure & displacement for longer period waves (> ~20s ),
-! assuming incompressible fluid column above bathymetry ocean bottom
-
-  implicit none
-
-  include 'constants.h'
-
-  integer :: NSPEC_AB,NGLOB_AB
-
-  real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_AB),intent(inout) :: accel
-  real(kind=CUSTOM_REAL),dimension(NGLOB_AB),intent(in) :: rmass,rmass_ocean_load
-
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB),intent(in) :: ibool
-
-  ! free surface
-  integer :: num_free_surface_faces
-  real(kind=CUSTOM_REAL) :: free_surface_normal(NDIM,NGLLSQUARE,num_free_surface_faces)
-  integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
-  integer :: free_surface_ispec(num_free_surface_faces)
-
-  ! adjoint simulations
-  integer :: SIMULATION_TYPE,NGLOB_ADJOINT
-  real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB_ADJOINT):: b_accel
-
-! local parameters
-  real(kind=CUSTOM_REAL) :: nx,ny,nz
-  real(kind=CUSTOM_REAL) :: additional_term,force_normal_comp
-  integer :: i,j,k,ispec,iglob
-  integer :: igll,iface
-  logical,dimension(NGLOB_AB) :: updated_dof_ocean_load
-  ! adjoint locals
-  real(kind=CUSTOM_REAL) :: b_additional_term,b_force_normal_comp
-
-  !   initialize the updates
-  updated_dof_ocean_load(:) = .false.
-
-  ! for surface elements exactly at the top of the model (ocean bottom)
-  do iface = 1,num_free_surface_faces
-
-    ispec = free_surface_ispec(iface)
-    do igll = 1, NGLLSQUARE
-      i = free_surface_ijk(1,igll,iface)
-      j = free_surface_ijk(2,igll,iface)
-      k = free_surface_ijk(3,igll,iface)
-
-      ! get global point number
-      iglob = ibool(i,j,k,ispec)
-
-      ! only update once
-      if(.not. updated_dof_ocean_load(iglob)) then
-
-        ! get normal
-        nx = free_surface_normal(1,igll,iface)
-        ny = free_surface_normal(2,igll,iface)
-        nz = free_surface_normal(3,igll,iface)
-
-        ! make updated component of right-hand side
-        ! we divide by rmass() which is 1 / M
-        ! we use the total force which includes the Coriolis term above
-        force_normal_comp = ( accel(1,iglob)*nx + &
-                              accel(2,iglob)*ny + &
-                              accel(3,iglob)*nz ) / rmass(iglob)
-
-        additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * force_normal_comp
-
-        accel(1,iglob) = accel(1,iglob) + additional_term * nx
-        accel(2,iglob) = accel(2,iglob) + additional_term * ny
-        accel(3,iglob) = accel(3,iglob) + additional_term * nz
-
-        ! adjoint simulations
-        if (SIMULATION_TYPE == 3) then
-          b_force_normal_comp = ( b_accel(1,iglob)*nx + &
-                                  b_accel(2,iglob)*ny + &
-                                  b_accel(3,iglob)*nz) / rmass(iglob)
-          b_additional_term = (rmass_ocean_load(iglob) - rmass(iglob)) * b_force_normal_comp
-
-          b_accel(1,iglob) = b_accel(1,iglob) + b_additional_term * nx
-          b_accel(2,iglob) = b_accel(2,iglob) + b_additional_term * ny
-          b_accel(3,iglob) = b_accel(3,iglob) + b_additional_term * nz
-        endif !adjoint
-
-        ! done with this point
-        updated_dof_ocean_load(iglob) = .true.
-
-      endif
-
-    enddo ! igll
-  enddo ! iface
-
-end subroutine elastic_ocean_load
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! distributes routines according to chosen NGLLX in constants.h
-
-!daniel: note -- i put it here rather than in compute_forces_elastic_Dev.f90 because compiler complains that:
-! " The storage extent of the dummy argument exceeds that of the actual argument. "
-
-subroutine compute_forces_elastic_Dev_sim1(iphase)
-
-! forward simulations
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  
-  implicit none
-
-  integer,intent(in) :: iphase
-  logical OPENMP_MODE
-  OPENMP_MODE = .false.
-
-  ! write(*,*) "num_elem_colors_elastic(1)=",num_elem_colors_elastic(1)
-  
-  select case(NGLLX)
-     
-  case (5)
-     if(OPENMP_MODE) then
-        call compute_forces_elastic_Dev_openmp(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-             hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-             wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-             kappastore,mustore,jacobian,ibool, &
-             ATTENUATION, &
-             one_minus_sum_beta,factor_common, &
-             alphaval,betaval,gammaval, &
-             NSPEC_ATTENUATION_AB, &
-             R_xx,R_yy,R_xy,R_xz,R_yz, &
-             epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-             epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-             ANISOTROPY,NSPEC_ANISO, &
-             c11store,c12store,c13store,c14store,c15store,c16store,&
-             c22store,c23store,c24store,c25store,c26store,c33store,&
-             c34store,c35store,c36store,c44store,c45store,c46store,&
-             c55store,c56store,c66store, &
-             SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-             NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-             is_moho_top,is_moho_bot, &
-             dsdx_top,dsdx_bot, &
-             ispec2D_moho_top,ispec2D_moho_bot, &
-             num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-             phase_ispec_inner_elastic,&
-             num_colors_outer_elastic,num_colors_inner_elastic,&
-             num_elem_colors_elastic,&
-             dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3,&
-             newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3,&
-             tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3)
-     else
-
-        call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-             hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-             wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-             kappastore,mustore,jacobian,ibool, &
-             ATTENUATION, &
-             one_minus_sum_beta,factor_common, &
-             alphaval,betaval,gammaval, &
-             NSPEC_ATTENUATION_AB, &
-             R_xx,R_yy,R_xy,R_xz,R_yz, &
-             epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-             epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-             ANISOTROPY,NSPEC_ANISO, &
-             c11store,c12store,c13store,c14store,c15store,c16store,&
-             c22store,c23store,c24store,c25store,c26store,c33store,&
-             c34store,c35store,c36store,c44store,c45store,c46store,&
-             c55store,c56store,c66store, &
-             SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-             NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-             is_moho_top,is_moho_bot, &
-             dsdx_top,dsdx_bot, &
-             ispec2D_moho_top,ispec2D_moho_bot, &
-             num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-             phase_ispec_inner_elastic )
-     endif
-
-  case (6)
-    call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                    kappastore,mustore,jacobian,ibool, &
-                    ATTENUATION, &
-                    one_minus_sum_beta,factor_common, &
-                    alphaval,betaval,gammaval, &
-                    NSPEC_ATTENUATION_AB, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                    ANISOTROPY,NSPEC_ANISO, &
-                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                    c55store,c56store,c66store, &
-                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                    is_moho_top,is_moho_bot, &
-                    dsdx_top,dsdx_bot, &
-                    ispec2D_moho_top,ispec2D_moho_bot, &
-                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                    phase_ispec_inner_elastic )
-
-  case (7)
-    call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                    kappastore,mustore,jacobian,ibool, &
-                    ATTENUATION, &
-                    one_minus_sum_beta,factor_common, &
-                    alphaval,betaval,gammaval, &
-                    NSPEC_ATTENUATION_AB, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                    ANISOTROPY,NSPEC_ANISO, &
-                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                    c55store,c56store,c66store, &
-                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                    is_moho_top,is_moho_bot, &
-                    dsdx_top,dsdx_bot, &
-                    ispec2D_moho_top,ispec2D_moho_bot, &
-                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                    phase_ispec_inner_elastic )
-
-  case (8)
-    call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                    kappastore,mustore,jacobian,ibool, &
-                    ATTENUATION, &
-                    one_minus_sum_beta,factor_common, &
-                    alphaval,betaval,gammaval, &
-                    NSPEC_ATTENUATION_AB, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                    ANISOTROPY,NSPEC_ANISO, &
-                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                    c55store,c56store,c66store, &
-                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                    is_moho_top,is_moho_bot, &
-                    dsdx_top,dsdx_bot, &
-                    ispec2D_moho_top,ispec2D_moho_bot, &
-                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                    phase_ispec_inner_elastic )
-
-  case (9)
-    call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                    kappastore,mustore,jacobian,ibool, &
-                    ATTENUATION, &
-                    one_minus_sum_beta,factor_common, &
-                    alphaval,betaval,gammaval, &
-                    NSPEC_ATTENUATION_AB, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                    ANISOTROPY,NSPEC_ANISO, &
-                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                    c55store,c56store,c66store, &
-                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                    is_moho_top,is_moho_bot, &
-                    dsdx_top,dsdx_bot, &
-                    ispec2D_moho_top,ispec2D_moho_bot, &
-                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                    phase_ispec_inner_elastic )
-
-  case (10)
-    call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB,displ,accel, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                    kappastore,mustore,jacobian,ibool, &
-                    ATTENUATION, &
-                    one_minus_sum_beta,factor_common, &
-                    alphaval,betaval,gammaval, &
-                    NSPEC_ATTENUATION_AB, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                    ANISOTROPY,NSPEC_ANISO, &
-                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                    c55store,c56store,c66store, &
-                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                    is_moho_top,is_moho_bot, &
-                    dsdx_top,dsdx_bot, &
-                    ispec2D_moho_top,ispec2D_moho_bot, &
-                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                    phase_ispec_inner_elastic )
-
-  case default
-
-    stop 'error no Deville routine available for chosen NGLLX'
-
-  end select
-
-end subroutine compute_forces_elastic_Dev_sim1
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-subroutine compute_forces_elastic_Dev_sim3(iphase)
-
-! uses backward/reconstructed displacement and acceleration arrays
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-
-  implicit none
-
-  integer,intent(in) :: iphase
-
-  select case(NGLLX)
-
-  case (5)
-    call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
-  case (6)
-    call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
-  case (7)
-    call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
-  case (8)
-    call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
-  case (9)
-    call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
-  case (10)
-    call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
-  case default
-
-    stop 'error no Deville routine available for chosen NGLLX'
-
-  end select
-
-
-end subroutine compute_forces_elastic_Dev_sim3
-
-

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_forces_elastic_Dev_openmp.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -26,32 +26,32 @@
 
 ! OpenMP Threaded variant by Max Rietmann and Olaf Schenk
 
-subroutine compute_forces_elastic_Dev_openmp(iphase ,NSPEC_AB,NGLOB_AB, &
-                                             displ,accel, &
-                                             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                                             hprime_xx,hprime_xxT, &
-                                             hprimewgll_xx,hprimewgll_xxT, &
-                                             wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                                             kappastore,mustore,jacobian,ibool, &
-                                             ATTENUATION, &
-                                             one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
-                                             NSPEC_ATTENUATION_AB, &
-                                             R_xx,R_yy,R_xy,R_xz,R_yz, &
-                                             epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                                             epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                                             ANISOTROPY,NSPEC_ANISO, &
-                                             c11store,c12store,c13store,c14store,c15store,c16store,&
-                                             c22store,c23store,c24store,c25store,c26store,c33store,&
-                                             c34store,c35store,c36store,c44store,c45store,c46store,&
-                                             c55store,c56store,c66store, &
-                                             SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                                             NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
-                                             is_moho_top,is_moho_bot, &
-                                             dsdx_top,dsdx_bot, &
-                                             ispec2D_moho_top,ispec2D_moho_bot, &
-                                             num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                                             phase_ispec_inner_elastic,&
-                                             num_colors_outer_elastic,num_colors_inner_elastic)
+  subroutine compute_forces_elastic_Dev_openmp(iphase ,NSPEC_AB,NGLOB_AB, &
+                             displ,accel, &
+                             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                             hprime_xx,hprime_xxT, &
+                             hprimewgll_xx,hprimewgll_xxT, &
+                             wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                             kappastore,mustore,jacobian,ibool, &
+                             ATTENUATION, &
+                             one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
+                             NSPEC_ATTENUATION_AB, &
+                             R_xx,R_yy,R_xy,R_xz,R_yz, &
+                             epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                             epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                             ANISOTROPY,NSPEC_ANISO, &
+                             c11store,c12store,c13store,c14store,c15store,c16store,&
+                             c22store,c23store,c24store,c25store,c26store,c33store,&
+                             c34store,c35store,c36store,c44store,c45store,c46store,&
+                             c55store,c56store,c66store, &
+                             SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                             NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
+                             is_moho_top,is_moho_bot, &
+                             dsdx_top,dsdx_bot, &
+                             ispec2D_moho_top,ispec2D_moho_bot, &
+                             num_phase_ispec_elastic,&
+                             phase_ispec_inner_elastic,&
+                             num_colors_outer_elastic,num_colors_inner_elastic)
 
 
 
@@ -63,7 +63,7 @@
 
   ! Trying to pass these variables as subroutine arguments ran into
   ! problems, so we reference them from their module, making them
-  ! accessible from this subroutine 
+  ! accessible from this subroutine
   use specfem_par_elastic, only:dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3,&
        newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3,&
        tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3,num_elem_colors_elastic
@@ -115,7 +115,7 @@
        c55store,c56store,c66store
 
   integer :: iphase
-  integer :: num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic
+  integer :: num_phase_ispec_elastic
   integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
 
   ! adjoint simulations
@@ -151,12 +151,14 @@
 
   integer OMP_get_thread_num
   integer OMP_GET_MAX_THREADS
-  double precision omp_get_wtime
-  double precision start_time
-  double precision end_time
-  double precision accumulate_time_start
-  double precision accumulate_time_stop
 
+  ! timing
+  !double precision omp_get_wtime
+  !double precision start_time
+  !double precision end_time
+  !double precision accumulate_time_start
+  !double precision accumulate_time_stop
+
   ! local anisotropy parameters
   real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
        c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
@@ -166,7 +168,7 @@
   integer i,j,k
   integer thread_id
   integer NUM_THREADS
-  integer omp_get_num_threads ! function
+  !integer omp_get_num_threads ! function
 
   ! coloring additions
   ! integer, dimension(:), allocatable :: num_elem_colors_elastic
@@ -174,8 +176,9 @@
   integer num_colors_outer_elastic, num_colors_inner_elastic
   integer icolor
 
-  ! write(*,*) "num_elem_colors_elastic(1)=",num_elem_colors_elastic(1)
+  ! write(*,*) "num_elem_colors_elastic(1) = ",num_elem_colors_elastic(1)
   imodulo_N_SLS = mod(N_SLS,3)
+
   ! NUM_THREADS = 1
   NUM_THREADS = OMP_GET_MAX_THREADS()
 
@@ -188,8 +191,8 @@
      number_of_colors = num_colors_inner_elastic + num_colors_outer_elastic
      istart = num_colors_outer_elastic+1
      ! istart = num_colors_outer_elastic
+  endif
 
-  endif
   ! "start" timer
   ! start_time = omp_get_wtime()
 
@@ -201,8 +204,8 @@
   ! order, stopping to synchronize threads after all the elements in a
   ! color are finished.
   estart = 1
- do icolor = istart, number_of_colors
-    
+  do icolor = istart, number_of_colors
+
     !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(&
     !$OMP R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3,&
     !$OMP factor_loc,alphaval_loc,betaval_loc,gammaval_loc,&
@@ -647,11 +650,20 @@
 
                 ! Assembly of shared degrees of freedom fixed through mesh coloring
                 !! !$OMP ATOMIC
-                accel(1,iglob) = accel(1,iglob) - (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
+                accel(1,iglob) = accel(1,iglob) &
+                                  - (fac1*newtempx1(i,j,k,thread_id) &
+                                   + fac2*newtempx2(i,j,k,thread_id) &
+                                   + fac3*newtempx3(i,j,k,thread_id))
                 !! !$OMP ATOMIC
-                accel(2,iglob) = accel(2,iglob) - (fac1*newtempy1(i,j,k,thread_id) + fac2*newtempy2(i,j,k,thread_id) + fac3*newtempy3(i,j,k,thread_id))
+                accel(2,iglob) = accel(2,iglob) &
+                                  - (fac1*newtempy1(i,j,k,thread_id) &
+                                   + fac2*newtempy2(i,j,k,thread_id) &
+                                   + fac3*newtempy3(i,j,k,thread_id))
                 !! !$OMP ATOMIC
-                accel(3,iglob) = accel(3,iglob) - (fac1*newtempz1(i,j,k,thread_id) + fac2*newtempz2(i,j,k,thread_id) + fac3*newtempz3(i,j,k,thread_id))
+                accel(3,iglob) = accel(3,iglob) &
+                                  - (fac1*newtempz1(i,j,k,thread_id) &
+                                   + fac2*newtempz2(i,j,k,thread_id) &
+                                   + fac3*newtempz3(i,j,k,thread_id))
 
                 ! accel(1,iglob) = accel(1,iglob) - &
                 ! (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
@@ -732,20 +744,19 @@
     ! color.
     estart = estart + num_elements
 
- enddo ! loop over colors
-  
+  enddo ! loop over colors
 
- 
-! "stop" timer
-! end_time = omp_get_wtime()
 
-! write(*,*) "Total Elapsed time: ", (end_time-start_time) , "seconds. (Threads=",NUM_THREADS,")"
-! write(*,*) "Accumulate Elapsed time: ", (accumulate_time_stop-accumulate_time_start) , "seconds"
+  ! "stop" timer
+  ! end_time = omp_get_wtime()
 
+  ! write(*,*) "Total Elapsed time: ", (end_time-start_time) , "seconds. (Threads=",NUM_THREADS,")"
+  ! write(*,*) "Accumulate Elapsed time: ", (accumulate_time_stop-accumulate_time_start) , "seconds"
 
+
   ! These are now allocated at the beginning and never deallocated
   ! because the program just finishes at the end.
-  
+
   ! deallocate(dummyx_loc)
   ! deallocate(dummyy_loc)
   ! deallocate(dummyz_loc)
@@ -767,9 +778,9 @@
   ! deallocate(tempz1)
   ! deallocate(tempz2)
   ! deallocate(tempz3)
-  
-! accel(:,:) = accel_omp(:,:,1)
 
-end subroutine compute_forces_elastic_Dev_openmp
+  ! accel(:,:) = accel_omp(:,:,1)
 
+  end subroutine compute_forces_elastic_Dev_openmp
 
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -69,88 +69,82 @@
   integer :: i,j,k,ispec,iglob
   real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc,b_epsilondev_loc
 
-  ! updates kernels on GPU
-  if(GPU_MODE) then
-    call compute_kernels_elastic_cuda(Mesh_pointer,deltat)
+  if( .not. GPU_MODE ) then
+    ! updates kernels on CPU
+    do ispec = 1, NSPEC_AB
 
-    ! for noise simulations --- source strength kernel
-    if (NOISE_TOMOGRAPHY == 3)  &
-      call compute_kernels_strength_noise(NGLLSQUARE*num_free_surface_faces,ibool, &
-                        sigma_kl,displ,deltat,it, &
-                        normal_x_noise,normal_y_noise,normal_z_noise, &
-                        noise_surface_movie, &
-                        NSPEC_AB,NGLOB_AB, &
-                        num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
-                        GPU_MODE,Mesh_pointer)
+      ! elastic domains
+      if( ispec_is_elastic(ispec) ) then
 
-    ! kernels are done
-    return
-  endif
+         do k = 1, NGLLZ
+            do j = 1, NGLLY
+               do i = 1, NGLLX
+                  iglob = ibool(i,j,k,ispec)
 
-  ! updates kernels on CPU
-  do ispec = 1, NSPEC_AB
+                  ! isotropic kernels
+                  ! note: takes displacement from backward/reconstructed (forward) field b_displ
+                  !          and acceleration from adjoint field accel (containing adjoint sources)
+                  !
+                  ! note: : time integral summation uses deltat
+                  !
+                  ! compare with Tromp et al. (2005), eq. (14), which takes adjoint displacement
+                  ! and forward acceleration, that is the symmetric form of what is calculated here
+                  ! however, this kernel expression is symmetric with regards
+                  ! to interchange adjoint - forward field
+                  rho_kl(i,j,k,ispec) =  rho_kl(i,j,k,ispec) &
+                       + deltat * dot_product(accel(:,iglob), b_displ(:,iglob))
 
-    ! elastic domains
-    if( ispec_is_elastic(ispec) ) then
+                  ! kernel for shear modulus, see e.g. Tromp et al. (2005), equation (17)
+                  ! note: multiplication with 2*mu(x) will be done after the time loop
+                  epsilondev_loc(1) = epsilondev_xx(i,j,k,ispec)
+                  epsilondev_loc(2) = epsilondev_yy(i,j,k,ispec)
+                  epsilondev_loc(3) = epsilondev_xy(i,j,k,ispec)
+                  epsilondev_loc(4) = epsilondev_xz(i,j,k,ispec)
+                  epsilondev_loc(5) = epsilondev_yz(i,j,k,ispec)
 
-       do k = 1, NGLLZ
-          do j = 1, NGLLY
-             do i = 1, NGLLX
-                iglob = ibool(i,j,k,ispec)
+                  b_epsilondev_loc(1) = b_epsilondev_xx(i,j,k,ispec)
+                  b_epsilondev_loc(2) = b_epsilondev_yy(i,j,k,ispec)
+                  b_epsilondev_loc(3) = b_epsilondev_xy(i,j,k,ispec)
+                  b_epsilondev_loc(4) = b_epsilondev_xz(i,j,k,ispec)
+                  b_epsilondev_loc(5) = b_epsilondev_yz(i,j,k,ispec)
 
-                ! isotropic kernels
-                ! note: takes displacement from backward/reconstructed (forward) field b_displ
-                !          and acceleration from adjoint field accel (containing adjoint sources)
-                !
-                ! note: : time integral summation uses deltat
-                !
-                ! compare with Tromp et al. (2005), eq. (14), which takes adjoint displacement
-                ! and forward acceleration, that is the symmetric form of what is calculated here
-                ! however, this kernel expression is symmetric with regards
-                ! to interchange adjoint - forward field
-                rho_kl(i,j,k,ispec) =  rho_kl(i,j,k,ispec) &
-                     + deltat * dot_product(accel(:,iglob), b_displ(:,iglob))
+                  mu_kl(i,j,k,ispec) =  mu_kl(i,j,k,ispec) &
+                       + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
+                       + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
+                       + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
+                       epsilondev_loc(5)*b_epsilondev_loc(5)) )
 
-                ! kernel for shear modulus, see e.g. Tromp et al. (2005), equation (17)
-                ! note: multiplication with 2*mu(x) will be done after the time loop
-                epsilondev_loc(1) = epsilondev_xx(i,j,k,ispec)
-                epsilondev_loc(2) = epsilondev_yy(i,j,k,ispec)
-                epsilondev_loc(3) = epsilondev_xy(i,j,k,ispec)
-                epsilondev_loc(4) = epsilondev_xz(i,j,k,ispec)
-                epsilondev_loc(5) = epsilondev_yz(i,j,k,ispec)
+                  ! kernel for bulk modulus, see e.g. Tromp et al. (2005), equation (18)
+                  ! note: multiplication with kappa(x) will be done after the time loop
+                  kappa_kl(i,j,k,ispec) = kappa_kl(i,j,k,ispec) &
+                       + deltat * (9 * epsilon_trace_over_3(i,j,k,ispec) &
+                       * b_epsilon_trace_over_3(i,j,k,ispec))
 
-                b_epsilondev_loc(1) = b_epsilondev_xx(i,j,k,ispec)
-                b_epsilondev_loc(2) = b_epsilondev_yy(i,j,k,ispec)
-                b_epsilondev_loc(3) = b_epsilondev_xy(i,j,k,ispec)
-                b_epsilondev_loc(4) = b_epsilondev_xz(i,j,k,ispec)
-                b_epsilondev_loc(5) = b_epsilondev_yz(i,j,k,ispec)
+               enddo
+            enddo
+         enddo
+      endif !ispec_is_elastic
 
-                mu_kl(i,j,k,ispec) =  mu_kl(i,j,k,ispec) &
-                     + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
-                     + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
-                     + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
-                     epsilondev_loc(5)*b_epsilondev_loc(5)) )
+    enddo
 
-                ! kernel for bulk modulus, see e.g. Tromp et al. (2005), equation (18)
-                ! note: multiplication with kappa(x) will be done after the time loop
-                kappa_kl(i,j,k,ispec) = kappa_kl(i,j,k,ispec) &
-                     + deltat * (9 * epsilon_trace_over_3(i,j,k,ispec) &
-                     * b_epsilon_trace_over_3(i,j,k,ispec))
+  else
+    ! updates kernels on GPU
+    call compute_kernels_elastic_cuda(Mesh_pointer,deltat)
+  endif
 
-             enddo
-          enddo
-       enddo
-    endif !ispec_is_elastic
 
-  enddo
-
   ! moho kernel
   if( SAVE_MOHO_MESH ) then
-      call compute_boundary_kernel()
+    if( GPU_MODE ) then
+      call transfer_accel_from_device(NDIM*NGLOB_AB,accel,Mesh_pointer)
+      call transfer_b_displ_from_device(NDIM*NGLOB_AB,b_displ,Mesh_pointer)
+    endif
+    ! updates on CPU
+    call compute_boundary_kernel()
   endif
 
   ! for noise simulations --- source strength kernel
-  if (NOISE_TOMOGRAPHY == 3)  &
+  if (NOISE_TOMOGRAPHY == 3) then
     call compute_kernels_strength_noise(NGLLSQUARE*num_free_surface_faces,ibool, &
                         sigma_kl,displ,deltat,it, &
                         normal_x_noise,normal_y_noise,normal_z_noise, &
@@ -158,6 +152,7 @@
                         NSPEC_AB,NGLOB_AB, &
                         num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
                         GPU_MODE,Mesh_pointer)
+  endif
 
   end subroutine compute_kernels_el
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -264,20 +264,20 @@
     NZ_IMAGE_color = NZ_IMAGE_color * zoom_factor
     zoom = .true.
   endif
-  
+
   ! create all the pixels
   if( NX_IMAGE_color /= 0 ) then
     size_pixel_horizontal = (xmax_color_image - xmin_color_image) / dble(NX_IMAGE_color)
   else
     size_pixel_horizontal = 0.0
   endif
-  
+
   if( NZ_IMAGE_color /= 0 ) then
     size_pixel_vertical = (zmax_color_image - zmin_color_image) / dble(NZ_IMAGE_color)
   else
     size_pixel_vertical = 0.0
   endif
-  
+
   if (myrank == 0) then
     write(IMAIN,*) '  image points: ',npgeo_glob
     write(IMAIN,*) '  xmin/xmax: ',xmin_color_image,'/',xmax_color_image
@@ -309,7 +309,7 @@
     distance_z1 = 0.0
     distance_z2 = 2.0*size_pixel_vertical
   endif
-  
+
   do j=1,NZ_IMAGE_color
     do i=1,NX_IMAGE_color
       ! calculates midpoint of pixel

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -149,13 +149,13 @@
           gammaz(NGLLX,NGLLY,NGLLZ,NSPEC_AB), &
           jacobian(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
   if( ier /= 0 ) stop 'error allocating arrays for databases'
-  
+
   ! mesh node locations
   allocate(xstore(NGLOB_AB), &
           ystore(NGLOB_AB), &
           zstore(NGLOB_AB),stat=ier)
-  if( ier /= 0 ) stop 'error allocating arrays for mesh nodes'  
-  
+  if( ier /= 0 ) stop 'error allocating arrays for mesh nodes'
+
   ! material properties
   allocate(kappastore(NGLLX,NGLLY,NGLLZ,NSPEC_AB), &
           mustore(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/iterate_time.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -91,7 +91,7 @@
     endif
 
     ! write the seismograms with time shift (GPU_MODE transfer included)
-    if (nrec_local > 0) then
+    if (nrec_local > 0 .or. ( WRITE_SEISMOGRAMS_BY_MASTER .and. myrank == 0 ) ) then
       call write_seismograms()
     endif
 
@@ -109,18 +109,16 @@
     if( MOVIE_SIMULATION ) then
       call write_movie_output()
     endif
-    
+
     ! first step of noise tomography, i.e., save a surface movie at every time step
-    if ( NOISE_TOMOGRAPHY == 1) then       
-       if( num_free_surface_faces == 0) then
-       else
-          call noise_save_surface_movie(displ, &
-               ibool, &
-               noise_surface_movie,it, &
-               NSPEC_AB,NGLOB_AB, &
-               num_free_surface_faces,free_surface_ispec,free_surface_ijk,&
-               Mesh_pointer,GPU_MODE)
-       endif
+    if ( NOISE_TOMOGRAPHY == 1) then
+      if( num_free_surface_faces > 0) then
+        call noise_save_surface_movie(displ,ibool, &
+                            noise_surface_movie,it, &
+                            NSPEC_AB,NGLOB_AB, &
+                            num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
+                            Mesh_pointer,GPU_MODE)
+      endif
     endif
 
 !
@@ -129,7 +127,7 @@
   enddo   ! end of main time loop
 
   call it_print_elapsed_time()
-  
+
   ! Transfer fields from GPU card to host for further analysis
   if(GPU_MODE) call it_transfer_from_GPU()
 
@@ -137,18 +135,17 @@
 
 
 !=====================================================================
-  
+
   subroutine it_print_elapsed_time()
     use specfem_par
     use specfem_par_elastic
     use specfem_par_acoustic
     implicit none
 
-    double precision :: tCPU,t_remain,t_total
-    integer :: ihours,iminutes,iseconds,int_tCPU, &
-         ihours_remain,iminutes_remain,iseconds_remain,int_t_remain, &
-         ihours_total,iminutes_total,iseconds_total,int_t_total
-    
+    ! local parameters
+    double precision :: tCPU
+    integer :: ihours,iminutes,iseconds,int_tCPU
+
     if(myrank == 0) then
        ! elapsed time since beginning of the simulation
        tCPU = wtime() - time_start
@@ -385,6 +382,7 @@
   if( ACOUSTIC_SIMULATION ) then
 
     if(.NOT. GPU_MODE) then
+      ! on CPU
       potential_acoustic(:) = potential_acoustic(:) &
                             + deltat * potential_dot_acoustic(:) &
                             + deltatsqover2 * potential_dot_dot_acoustic(:)

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/noise_tomography.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -447,7 +447,7 @@
 
   if (NOISE_TOMOGRAPHY/=0) then
      ! save/read the surface movie using the same c routine as we do for absorbing boundaries (file ID is 2)
-     
+
      ! size of single record
      reclen=CUSTOM_REAL*NDIM*NGLLSQUARE*NSPEC_TOP
 
@@ -459,7 +459,7 @@
            print *,'reclen of noise surface_movie needed exceeds integer 4-byte limit: ',reclen
            print *,'  ',CUSTOM_REAL, NDIM, NGLLSQUARE, NSPEC_TOP
            print*,'bit size fortran: ',bit_size(NSPEC_TOP)
-           call exit_MPI(myrank,"error NSPEC_TOP integer limit")            
+           call exit_MPI(myrank,"error NSPEC_TOP integer limit")
         endif
 
         ! total file size

Copied: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.F90 (from rev 19670, seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90)
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.F90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -0,0 +1,1106 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+!
+! United States and French Government Sponsorship Acknowledged.
+
+  subroutine prepare_timerun()
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  use specfem_par_movie
+
+  implicit none
+  character(len=256) :: plot_file
+  integer :: ier
+
+  ! flag for any movie simulation
+  if( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP .or. &
+     MOVIE_SURFACE .or. CREATE_SHAKEMAP .or. MOVIE_VOLUME .or. PNM_GIF_IMAGE ) then
+    MOVIE_SIMULATION = .true.
+  else
+    MOVIE_SIMULATION = .false.
+  endif
+
+  ! user info
+  if(myrank == 0) then
+
+    write(IMAIN,*)
+    if(ATTENUATION) then
+      write(IMAIN,*) 'incorporating attenuation using ',N_SLS,' standard linear solids'
+      if(USE_OLSEN_ATTENUATION) then
+        write(IMAIN,*) 'using Olsen''s attenuation'
+      else
+        write(IMAIN,*) 'not using Olsen''s attenuation'
+      endif
+    else
+      write(IMAIN,*) 'no attenuation'
+    endif
+
+    write(IMAIN,*)
+    if(ANISOTROPY) then
+      write(IMAIN,*) 'incorporating anisotropy'
+    else
+      write(IMAIN,*) 'no anisotropy'
+    endif
+
+    write(IMAIN,*)
+    if(OCEANS) then
+      write(IMAIN,*) 'incorporating the oceans using equivalent load'
+    else
+      write(IMAIN,*) 'no oceans'
+    endif
+
+    write(IMAIN,*)
+    if(GRAVITY) then
+      write(IMAIN,*) 'incorporating gravity'
+    else
+      write(IMAIN,*) 'no gravity'
+    endif
+
+    write(IMAIN,*)
+    if(ACOUSTIC_SIMULATION) then
+      write(IMAIN,*) 'incorporating acoustic simulation'
+    else
+      write(IMAIN,*) 'no acoustic simulation'
+    endif
+
+    write(IMAIN,*)
+    if(ELASTIC_SIMULATION) then
+      write(IMAIN,*) 'incorporating elastic simulation'
+    else
+      write(IMAIN,*) 'no elastic simulation'
+    endif
+
+    write(IMAIN,*)
+    if(POROELASTIC_SIMULATION) then
+      write(IMAIN,*) 'incorporating poroelastic simulation'
+    else
+      write(IMAIN,*) 'no poroelastic simulation'
+    endif
+    write(IMAIN,*)
+
+    write(IMAIN,*)
+    if(MOVIE_SIMULATION) then
+      write(IMAIN,*) 'incorporating movie simulation'
+    else
+      write(IMAIN,*) 'no movie simulation'
+    endif
+    write(IMAIN,*)
+
+  endif
+
+  ! synchronize all the processes before assembling the mass matrix
+  ! to make sure all the nodes have finished to read their databases
+  call sync_all()
+
+  ! sets up mass matrices
+  call prepare_timerun_mass_matrices()
+
+  ! initialize acoustic arrays to zero
+  if( ACOUSTIC_SIMULATION ) then
+    potential_acoustic(:) = 0._CUSTOM_REAL
+    potential_dot_acoustic(:) = 0._CUSTOM_REAL
+    potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
+    ! put negligible initial value to avoid very slow underflow trapping
+    if(FIX_UNDERFLOW_PROBLEM) potential_acoustic(:) = VERYSMALLVAL
+  endif
+
+  ! initialize elastic arrays to zero/verysmallvall
+  if( ELASTIC_SIMULATION ) then
+    displ(:,:) = 0._CUSTOM_REAL
+    veloc(:,:) = 0._CUSTOM_REAL
+    accel(:,:) = 0._CUSTOM_REAL
+    ! put negligible initial value to avoid very slow underflow trapping
+    if(FIX_UNDERFLOW_PROBLEM) displ(:,:) = VERYSMALLVAL
+  endif
+
+
+  ! distinguish between single and double precision for reals
+  if(CUSTOM_REAL == SIZE_REAL) then
+    deltat = sngl(DT)
+  else
+    deltat = DT
+  endif
+  deltatover2 = deltat/2._CUSTOM_REAL
+  deltatsqover2 = deltat*deltat/2._CUSTOM_REAL
+
+  ! seismograms
+  if (nrec_local > 0) then
+    ! allocate seismogram array
+    allocate(seismograms_d(NDIM,nrec_local,NSTEP),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array seismograms_d'
+    allocate(seismograms_v(NDIM,nrec_local,NSTEP),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array seismograms_v'
+    allocate(seismograms_a(NDIM,nrec_local,NSTEP),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array seismograms_a'
+
+    ! initialize seismograms
+    seismograms_d(:,:,:) = 0._CUSTOM_REAL
+    seismograms_v(:,:,:) = 0._CUSTOM_REAL
+    seismograms_a(:,:,:) = 0._CUSTOM_REAL
+  endif
+
+  ! synchronize all the processes
+  call sync_all()
+
+  ! prepares attenuation arrays
+  call prepare_timerun_attenuation()
+
+  ! prepares gravity arrays
+  call prepare_timerun_gravity()
+
+  ! initializes PML arrays
+  if( ABSORBING_CONDITIONS  ) then
+    if (SIMULATION_TYPE /= 1 .and. ABSORB_USE_PML )  then
+      write(IMAIN,*) 'NOTE: adjoint simulations and PML not supported yet...'
+    else
+      if( ABSORB_USE_PML ) then
+        call PML_initialize()
+      endif
+    endif
+  endif
+
+  ! opens source time function file
+  if(PRINT_SOURCE_TIME_FUNCTION .and. myrank == 0) then
+    ! print the source-time function
+    if(NSOURCES == 1) then
+      plot_file = '/plot_source_time_function.txt'
+    else
+     if(NSOURCES < 10) then
+        write(plot_file,"('/plot_source_time_function',i1,'.txt')") NSOURCES
+      else
+        write(plot_file,"('/plot_source_time_function',i2,'.txt')") NSOURCES
+      endif
+    endif
+    open(unit=IOSTF,file=trim(OUTPUT_FILES)//plot_file,status='unknown')
+  endif
+
+  ! user output
+  if(myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) '           time step: ',sngl(DT),' s'
+    write(IMAIN,*) 'number of time steps: ',NSTEP
+    write(IMAIN,*) 'total simulated time: ',sngl(NSTEP*DT),' seconds'
+    write(IMAIN,*)
+  endif
+
+  ! prepares ADJOINT simulations
+  call prepare_timerun_adjoint()
+
+  ! prepares noise simulations
+  call prepare_timerun_noise()
+
+  ! prepares GPU arrays
+  if(GPU_MODE) call prepare_timerun_GPU()
+
+#ifdef OPENMP_MODE
+  ! prepares arrays for OpenMP
+  call prepare_timerun_OpenMP()
+#endif
+
+  end subroutine prepare_timerun
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine prepare_timerun_mass_matrices()
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  implicit none
+
+! the mass matrix needs to be assembled with MPI here once and for all
+  if(ACOUSTIC_SIMULATION) then
+    call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_acoustic, &
+                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+                        my_neighbours_ext_mesh)
+
+    ! fill mass matrix with fictitious non-zero values to make sure it can be inverted globally
+    where(rmass_acoustic <= 0._CUSTOM_REAL) rmass_acoustic = 1._CUSTOM_REAL
+    rmass_acoustic(:) = 1._CUSTOM_REAL / rmass_acoustic(:)
+
+  endif ! ACOUSTIC_SIMULATION
+
+  if(ELASTIC_SIMULATION) then
+    call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass, &
+                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                        my_neighbours_ext_mesh)
+
+    ! fill mass matrix with fictitious non-zero values to make sure it can be inverted globally
+    where(rmass <= 0._CUSTOM_REAL) rmass = 1._CUSTOM_REAL
+    rmass(:) = 1._CUSTOM_REAL / rmass(:)
+
+    if(OCEANS ) then
+      if( minval(rmass_ocean_load(:)) <= 0._CUSTOM_REAL) &
+        call exit_MPI(myrank,'negative ocean load mass matrix term')
+      rmass_ocean_load(:) = 1. / rmass_ocean_load(:)
+    endif
+
+  endif ! ELASTIC_SIMULATION
+
+  if(POROELASTIC_SIMULATION) then
+
+    stop 'poroelastic simulation not implemented yet'
+    ! but would be something like this...
+    call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_solid_poroelastic, &
+                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                        my_neighbours_ext_mesh)
+
+    call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_fluid_poroelastic, &
+                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                        my_neighbours_ext_mesh)
+
+    ! fills mass matrix with fictitious non-zero values to make sure it can be inverted globally
+    where(rmass_solid_poroelastic <= 0._CUSTOM_REAL) rmass_solid_poroelastic = 1._CUSTOM_REAL
+    where(rmass_fluid_poroelastic <= 0._CUSTOM_REAL) rmass_fluid_poroelastic = 1._CUSTOM_REAL
+    rmass_solid_poroelastic(:) = 1._CUSTOM_REAL / rmass_solid_poroelastic(:)
+    rmass_fluid_poroelastic(:) = 1._CUSTOM_REAL / rmass_fluid_poroelastic(:)
+
+  endif ! POROELASTIC_SIMULATION
+
+  if(myrank == 0) write(IMAIN,*) 'end assembling MPI mass matrix'
+
+
+  end subroutine prepare_timerun_mass_matrices
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine prepare_timerun_attenuation()
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  implicit none
+
+  ! local parameters
+  double precision, dimension(N_SLS) :: tau_sigma_dble
+  double precision :: f_c_source
+  double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
+  real(kind=CUSTOM_REAL):: scale_factorl
+  integer :: i,j,k,ispec,ier
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: scale_factor
+
+  ! if attenuation is on, shift shear moduli to center frequency of absorption period band, i.e.
+  ! rescale mu to average (central) frequency for attenuation
+  if(ATTENUATION) then
+
+    ! initializes arrays
+    one_minus_sum_beta(:,:,:,:) = 1._CUSTOM_REAL
+    factor_common(:,:,:,:,:) = 1._CUSTOM_REAL
+
+    allocate( scale_factor(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocation scale_factor')
+    scale_factor(:,:,:,:) = 1._CUSTOM_REAL
+
+    ! reads in attenuation arrays
+    open(unit=27, file=prname(1:len_trim(prname))//'attenuation.bin', &
+          status='old',action='read',form='unformatted',iostat=ier)
+    if( ier /= 0 ) then
+      print*,'error: could not open ',prname(1:len_trim(prname))//'attenuation.bin'
+      call exit_mpi(myrank,'error opening attenuation.bin file')
+    endif
+    read(27) ispec
+    if( ispec /= NSPEC_ATTENUATION_AB ) then
+      close(27)
+      print*,'error: attenuation file array ',ispec,'should be ',NSPEC_ATTENUATION_AB
+      call exit_mpi(myrank,'error attenuation array dimensions, please recompile and rerun generate_databases')
+    endif
+    read(27) one_minus_sum_beta
+    read(27) factor_common
+    read(27) scale_factor
+    close(27)
+
+
+    ! gets stress relaxation times tau_sigma, i.e.
+    ! precalculates tau_sigma depending on period band (constant for all Q_mu), and
+    ! determines central frequency f_c_source of attenuation period band
+    call get_attenuation_constants(min_resolved_period,tau_sigma_dble,&
+              f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
+
+    ! determines alphaval,betaval,gammaval for runge-kutta scheme
+    if(CUSTOM_REAL == SIZE_REAL) then
+      tau_sigma(:) = sngl(tau_sigma_dble(:))
+    else
+      tau_sigma(:) = tau_sigma_dble(:)
+    endif
+    call get_attenuation_memory_values(tau_sigma,deltat,alphaval,betaval,gammaval)
+
+    ! shifts shear moduli
+    do ispec = 1,NSPEC_AB
+
+      ! skips non elastic elements
+      if( ispec_is_elastic(ispec) .eqv. .false. ) cycle
+
+      ! determines attenuation factors for each GLL point
+      do k=1,NGLLZ
+        do j=1,NGLLY
+          do i=1,NGLLX
+
+            ! scales only mu moduli
+            scale_factorl = scale_factor(i,j,k,ispec)
+            mustore(i,j,k,ispec) = mustore(i,j,k,ispec) * scale_factorl
+
+          enddo
+        enddo
+      enddo
+    enddo
+
+    deallocate(scale_factor)
+
+    ! statistics
+    ! user output
+    if( myrank == 0 ) then
+      write(IMAIN,*)
+      write(IMAIN,*) "attenuation: "
+      write(IMAIN,*) "  reference period (s)   : ",sngl(1.0/ATTENUATION_f0_REFERENCE), &
+                    " frequency: ",sngl(ATTENUATION_f0_REFERENCE)
+      write(IMAIN,*) "  period band min/max (s): ",sngl(MIN_ATTENUATION_PERIOD),sngl(MAX_ATTENUATION_PERIOD)
+      write(IMAIN,*) "  central period (s)     : ",sngl(1.0/f_c_source), &
+                    " frequency: ",sngl(f_c_source)
+      write(IMAIN,*)
+    endif
+
+    ! clear memory variables if attenuation
+    ! initialize memory variables for attenuation
+    epsilondev_xx(:,:,:,:) = 0._CUSTOM_REAL
+    epsilondev_yy(:,:,:,:) = 0._CUSTOM_REAL
+    epsilondev_xy(:,:,:,:) = 0._CUSTOM_REAL
+    epsilondev_xz(:,:,:,:) = 0._CUSTOM_REAL
+    epsilondev_yz(:,:,:,:) = 0._CUSTOM_REAL
+
+    R_xx(:,:,:,:,:) = 0._CUSTOM_REAL
+    R_yy(:,:,:,:,:) = 0._CUSTOM_REAL
+    R_xy(:,:,:,:,:) = 0._CUSTOM_REAL
+    R_xz(:,:,:,:,:) = 0._CUSTOM_REAL
+    R_yz(:,:,:,:,:) = 0._CUSTOM_REAL
+
+    if(FIX_UNDERFLOW_PROBLEM) then
+      R_xx(:,:,:,:,:) = VERYSMALLVAL
+      R_yy(:,:,:,:,:) = VERYSMALLVAL
+      R_xy(:,:,:,:,:) = VERYSMALLVAL
+      R_xz(:,:,:,:,:) = VERYSMALLVAL
+      R_yz(:,:,:,:,:) = VERYSMALLVAL
+    endif
+  endif
+
+  end subroutine prepare_timerun_attenuation
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine prepare_timerun_gravity()
+
+! precomputes gravity factors
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  implicit none
+
+  ! local parameters
+  double precision RICB,RCMB,RTOPDDOUBLEPRIME, &
+    R80,R220,R400,R600,R670,R771,RMOHO,RMIDDLE_CRUST,ROCEAN
+  double precision :: rspl_gravity(NR),gspl(NR),gspl2(NR)
+  double precision :: radius,g,dg ! radius_km
+  !double precision :: g_cmb_dble,g_icb_dble
+  double precision :: rho,drhodr,vp,vs,Qkappa,Qmu
+  integer :: nspl_gravity !int_radius
+  integer :: i,j,k,iglob,ier
+
+  ! sets up weights needed for integration of gravity
+  do k=1,NGLLZ
+    do j=1,NGLLY
+      do i=1,NGLLX
+        wgll_cube(i,j,k) = sngl( wxgll(i)*wygll(j)*wzgll(k) )
+      enddo
+    enddo
+  enddo
+
+  ! store g, rho and dg/dr=dg using normalized radius in lookup table every 100 m
+  ! get density and velocity from PREM model using dummy doubling flag
+  ! this assumes that the gravity perturbations are small and smooth
+  ! and that we can neglect the 3D model and use PREM every 100 m in all cases
+  ! this is probably a rather reasonable assumption
+  if(GRAVITY) then
+
+    ! allocates gravity arrays
+    allocate( minus_deriv_gravity(NGLOB_AB), &
+             minus_g(NGLOB_AB), stat=ier)
+    if( ier /= 0 ) stop 'error allocating gravity arrays'
+
+    ! sets up spline table
+    call make_gravity(nspl_gravity,rspl_gravity,gspl,gspl2, &
+                          ROCEAN,RMIDDLE_CRUST,RMOHO,R80,R220,R400,R600,R670, &
+                          R771,RTOPDDOUBLEPRIME,RCMB,RICB)
+
+    ! pre-calculates gravity terms for all global points
+    do iglob = 1,NGLOB_AB
+
+      ! normalized radius ( zstore values given in m, negative values for depth)
+      radius = ( R_EARTH + zstore(iglob) ) / R_EARTH
+      call spline_evaluation(rspl_gravity,gspl,gspl2,nspl_gravity,radius,g)
+
+      ! use PREM density profile to calculate gravity (fine for other 1D models)
+      call model_prem_iso(radius,rho,drhodr,vp,vs,Qkappa,Qmu, &
+                        RICB,RCMB,RTOPDDOUBLEPRIME, &
+                        R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN)
+
+      dg = 4.0d0*rho - 2.0d0*g/radius
+
+      ! re-dimensionalize
+      g = g * R_EARTH*(PI*GRAV*RHOAV) ! in m / s^2 ( should be around 10 m/s^2 )
+      dg = dg * R_EARTH*(PI*GRAV*RHOAV) / R_EARTH ! gradient d/dz g , in 1/s^2
+
+      minus_deriv_gravity(iglob) = - dg
+      minus_g(iglob) = - g ! in negative z-direction
+
+      ! debug
+      !if( iglob == 1 .or. iglob == 1000 .or. iglob == 10000 ) then
+      !  ! re-dimensionalize
+      !  radius = radius * R_EARTH ! in m
+      !  vp = vp * R_EARTH*dsqrt(PI*GRAV*RHOAV)  ! in m / s
+      !  rho = rho  * RHOAV  ! in kg / m^3
+      !  print*,'gravity: radius=',radius,'g=',g,'depth=',radius-R_EARTH
+      !  print*,'vp=',vp,'rho=',rho,'kappa=',(vp**2) * rho
+      !  print*,'minus_g..=',minus_g(iglob)
+      !endif
+    enddo
+
+  else
+
+    ! allocates dummy gravity arrays
+    allocate( minus_deriv_gravity(0), &
+             minus_g(0), stat=ier)
+    if( ier /= 0 ) stop 'error allocating gravity arrays'
+
+  endif
+
+  end subroutine prepare_timerun_gravity
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine prepare_timerun_adjoint()
+
+! prepares adjoint simulations
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  implicit none
+  ! local parameters
+  integer :: ier
+  integer(kind=8) :: filesize
+
+! seismograms
+  if (nrec_local > 0 .and. SIMULATION_TYPE == 2 ) then
+    ! allocate Frechet derivatives array
+    allocate(Mxx_der(nrec_local),Myy_der(nrec_local), &
+            Mzz_der(nrec_local),Mxy_der(nrec_local), &
+            Mxz_der(nrec_local),Myz_der(nrec_local), &
+            sloc_der(NDIM,nrec_local),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array Mxx_der and following arrays'
+    Mxx_der = 0._CUSTOM_REAL
+    Myy_der = 0._CUSTOM_REAL
+    Mzz_der = 0._CUSTOM_REAL
+    Mxy_der = 0._CUSTOM_REAL
+    Mxz_der = 0._CUSTOM_REAL
+    Myz_der = 0._CUSTOM_REAL
+    sloc_der = 0._CUSTOM_REAL
+
+    allocate(seismograms_eps(NDIM,NDIM,nrec_local,NSTEP),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array seismograms_eps'
+    seismograms_eps(:,:,:,:) = 0._CUSTOM_REAL
+  endif
+
+! timing
+  if (SIMULATION_TYPE == 3) then
+
+    ! backward/reconstructed wavefields: time stepping is in time-reversed sense
+    ! (negative time increments)
+    if(CUSTOM_REAL == SIZE_REAL) then
+      b_deltat = - sngl(DT)
+    else
+      b_deltat = - DT
+    endif
+    b_deltatover2 = b_deltat/2._CUSTOM_REAL
+    b_deltatsqover2 = b_deltat*b_deltat/2._CUSTOM_REAL
+
+  endif
+
+! attenuation backward memories
+  if( ATTENUATION .and. SIMULATION_TYPE == 3 ) then
+
+    ! precompute Runge-Kutta coefficients if attenuation
+    call get_attenuation_memory_values(tau_sigma,b_deltat,b_alphaval,b_betaval,b_gammaval)
+
+  endif
+
+! initializes adjoint kernels and reconstructed/backward wavefields
+  if (SIMULATION_TYPE == 3)  then
+    ! elastic domain
+    if( ELASTIC_SIMULATION ) then
+      rho_kl(:,:,:,:)   = 0._CUSTOM_REAL
+      mu_kl(:,:,:,:)    = 0._CUSTOM_REAL
+      kappa_kl(:,:,:,:) = 0._CUSTOM_REAL
+
+      if ( APPROXIMATE_HESS_KL ) &
+        hess_kl(:,:,:,:)   = 0._CUSTOM_REAL
+
+      ! reconstructed/backward elastic wavefields
+      b_displ = 0._CUSTOM_REAL
+      b_veloc = 0._CUSTOM_REAL
+      b_accel = 0._CUSTOM_REAL
+      if(FIX_UNDERFLOW_PROBLEM) b_displ = VERYSMALLVAL
+
+      ! memory variables if attenuation
+      if( ATTENUATION ) then
+         b_R_xx = 0._CUSTOM_REAL
+         b_R_yy = 0._CUSTOM_REAL
+         b_R_xy = 0._CUSTOM_REAL
+         b_R_xz = 0._CUSTOM_REAL
+         b_R_yz = 0._CUSTOM_REAL
+         b_epsilondev_xx = 0._CUSTOM_REAL
+         b_epsilondev_yy = 0._CUSTOM_REAL
+         b_epsilondev_xy = 0._CUSTOM_REAL
+         b_epsilondev_xz = 0._CUSTOM_REAL
+         b_epsilondev_yz = 0._CUSTOM_REAL
+      endif
+
+    endif
+
+    ! acoustic domain
+    if( ACOUSTIC_SIMULATION ) then
+      rho_ac_kl(:,:,:,:)   = 0._CUSTOM_REAL
+      kappa_ac_kl(:,:,:,:) = 0._CUSTOM_REAL
+
+      if ( APPROXIMATE_HESS_KL ) &
+        hess_ac_kl(:,:,:,:)   = 0._CUSTOM_REAL
+
+      ! reconstructed/backward acoustic potentials
+      b_potential_acoustic = 0._CUSTOM_REAL
+      b_potential_dot_acoustic = 0._CUSTOM_REAL
+      b_potential_dot_dot_acoustic = 0._CUSTOM_REAL
+      if(FIX_UNDERFLOW_PROBLEM) b_potential_acoustic = VERYSMALLVAL
+
+    endif
+  endif
+
+! initialize Moho boundary index
+  if (SAVE_MOHO_MESH .and. SIMULATION_TYPE == 3) then
+    ispec2D_moho_top = 0
+    ispec2D_moho_bot = 0
+  endif
+
+! stacey absorbing fields will be reconstructed for adjoint simulations
+! using snapshot files of wavefields
+  if( ABSORBING_CONDITIONS ) then
+
+    ! opens absorbing wavefield saved/to-be-saved by forward simulations
+    if( num_abs_boundary_faces > 0 .and. (SIMULATION_TYPE == 3 .or. &
+          (SIMULATION_TYPE == 1 .and. SAVE_FORWARD)) ) then
+
+      b_num_abs_boundary_faces = num_abs_boundary_faces
+
+      ! elastic domains
+      if( ELASTIC_SIMULATION) then
+        ! allocates wavefield
+        allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+        if( ier /= 0 ) stop 'error allocating array b_absorb_field'
+
+        ! size of single record
+        b_reclen_field = CUSTOM_REAL * NDIM * NGLLSQUARE * num_abs_boundary_faces
+
+        ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer
+        if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NDIM * NGLLSQUARE) ) then
+          print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_field
+          print *,'  ',CUSTOM_REAL, NDIM, NGLLSQUARE, num_abs_boundary_faces
+          print*,'bit size fortran: ',bit_size(b_reclen_field)
+          call exit_MPI(myrank,"error b_reclen_field integer limit")
+        endif
+
+        ! total file size
+        filesize = b_reclen_field
+        filesize = filesize*NSTEP
+
+        if (SIMULATION_TYPE == 3) then
+          ! opens existing files
+
+          ! uses fortran routines for reading
+          !open(unit=IOABS,file=trim(prname)//'absorb_field.bin',status='old',&
+          !      action='read',form='unformatted',access='direct', &
+          !      recl=b_reclen_field+2*4,iostat=ier )
+          !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_field.bin file')
+          ! uses c routines for faster reading
+          call open_file_abs_r(0,trim(prname)//'absorb_field.bin', &
+                              len_trim(trim(prname)//'absorb_field.bin'), &
+                              filesize)
+
+        else
+          ! opens new file
+          ! uses fortran routines for writing
+          !open(unit=IOABS,file=trim(prname)//'absorb_field.bin',status='unknown',&
+          !      form='unformatted',access='direct',&
+          !      recl=b_reclen_field+2*4,iostat=ier )
+          !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_field.bin file')
+          ! uses c routines for faster writing (file index 0 for acoutic domain file)
+          call open_file_abs_w(0,trim(prname)//'absorb_field.bin', &
+                              len_trim(trim(prname)//'absorb_field.bin'), &
+                              filesize)
+
+        endif
+      endif
+
+      ! acoustic domains
+      if( ACOUSTIC_SIMULATION) then
+        ! allocates wavefield
+        allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+        if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
+
+        ! size of single record
+        b_reclen_potential = CUSTOM_REAL * NGLLSQUARE * num_abs_boundary_faces
+
+        ! check integer size limit: size of b_reclen_potential must fit onto an 4-byte integer
+        if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NGLLSQUARE) ) then
+          print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_potential
+          print *,'  ',CUSTOM_REAL, NGLLSQUARE, num_abs_boundary_faces
+          print*,'bit size fortran: ',bit_size(b_reclen_potential)
+          call exit_MPI(myrank,"error b_reclen_potential integer limit")
+        endif
+
+        ! total file size (two lines to implicitly convert to 8-byte integers)
+        filesize = b_reclen_potential
+        filesize = filesize*NSTEP
+
+        ! debug check size limit
+        !if( NSTEP > 2147483647 / b_reclen_potential ) then
+        !  print *,'file size needed exceeds integer 4-byte limit: ',b_reclen_potential,NSTEP
+        !  print *,'  ',CUSTOM_REAL, NGLLSQUARE, num_abs_boundary_faces,NSTEP
+        !  print*,'file size fortran: ',filesize
+        !  print*,'file bit size fortran: ',bit_size(filesize)
+        !endif
+
+        if (SIMULATION_TYPE == 3) then
+          ! opens existing files
+          ! uses fortran routines for reading
+          !open(unit=IOABS_AC,file=trim(prname)//'absorb_potential.bin',status='old',&
+          !      action='read',form='unformatted',access='direct', &
+          !      recl=b_reclen_potential+2*4,iostat=ier )
+          !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_potential.bin file')
+
+          ! uses c routines for faster reading
+          call open_file_abs_r(1,trim(prname)//'absorb_potential.bin', &
+                              len_trim(trim(prname)//'absorb_potential.bin'), &
+                              filesize)
+
+        else
+          ! opens new file
+          ! uses fortran routines for writing
+          !open(unit=IOABS_AC,file=trim(prname)//'absorb_potential.bin',status='unknown',&
+          !      form='unformatted',access='direct',&
+          !      recl=b_reclen_potential+2*4,iostat=ier )
+          !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_potential.bin file')
+          ! uses c routines for faster writing (file index 1 for acoutic domain file)
+          call open_file_abs_w(1,trim(prname)//'absorb_potential.bin', &
+                              len_trim(trim(prname)//'absorb_potential.bin'), &
+                              filesize)
+
+        endif
+      endif
+    else
+      ! needs dummy array
+      b_num_abs_boundary_faces = 1
+      if( ELASTIC_SIMULATION ) then
+        allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+        if( ier /= 0 ) stop 'error allocating array b_absorb_field'
+      endif
+
+      if( ACOUSTIC_SIMULATION ) then
+        allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+        if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
+      endif
+    endif
+  else ! ABSORBING_CONDITIONS
+    ! needs dummy array
+    b_num_abs_boundary_faces = 1
+    if( ELASTIC_SIMULATION ) then
+      allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+      if( ier /= 0 ) stop 'error allocating array b_absorb_field'
+    endif
+
+    if( ACOUSTIC_SIMULATION ) then
+      allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
+      if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
+    endif
+  endif
+
+
+  end subroutine prepare_timerun_adjoint
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine prepare_timerun_noise()
+
+! prepares noise simulations
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  use specfem_par_movie
+  implicit none
+  ! local parameters
+  integer :: ier
+
+  ! for noise simulations
+  if ( NOISE_TOMOGRAPHY /= 0 ) then
+
+    ! checks if free surface is defined
+    if( num_free_surface_faces == 0 ) then
+       write(*,*) myrank, " doesn't have a free_surface_face"
+       ! stop 'error: noise simulations need a free surface'
+    endif
+
+    ! allocates arrays
+    allocate(noise_sourcearray(NDIM,NGLLX,NGLLY,NGLLZ,NSTEP),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating noise source array')
+
+    allocate(normal_x_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array normal_x_noise'
+    allocate(normal_y_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array normal_y_noise'
+    allocate(normal_z_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array normal_z_noise'
+    allocate(mask_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array mask_noise'
+    allocate(noise_surface_movie(NDIM,NGLLSQUARE,num_free_surface_faces),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array noise_surface_movie'
+
+    ! initializes
+    noise_sourcearray(:,:,:,:,:) = 0._CUSTOM_REAL
+    normal_x_noise(:)            = 0._CUSTOM_REAL
+    normal_y_noise(:)            = 0._CUSTOM_REAL
+    normal_z_noise(:)            = 0._CUSTOM_REAL
+    mask_noise(:)                = 0._CUSTOM_REAL
+    noise_surface_movie(:,:,:) = 0._CUSTOM_REAL
+
+    ! sets up noise source for master receiver station
+    call read_parameters_noise(myrank,nrec,NSTEP,NGLLSQUARE*num_free_surface_faces, &
+                               islice_selected_rec,xi_receiver,eta_receiver,gamma_receiver,nu, &
+                               noise_sourcearray,xigll,yigll,zigll, &
+                               ibool, &
+                               xstore,ystore,zstore, &
+                               irec_master_noise,normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
+                               NSPEC_AB,NGLOB_AB, &
+                               num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
+                               ispec_is_acoustic)
+
+    ! checks flags for noise simulation
+    call check_parameters_noise(myrank,NOISE_TOMOGRAPHY,SIMULATION_TYPE,SAVE_FORWARD, &
+                                LOCAL_PATH, &
+                                num_free_surface_faces,NSTEP)
+  endif
+
+  end subroutine prepare_timerun_noise
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine prepare_timerun_GPU()
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  use specfem_par_movie
+
+  implicit none
+  real :: free_mb,used_mb,total_mb
+  integer :: ncuda_devices,ncuda_devices_min,ncuda_devices_max
+
+  ! GPU_MODE now defined in Par_file
+  if(myrank == 0 ) then
+    write(IMAIN,*)
+    write(IMAIN,*) "GPU_MODE Active. Preparing Fields and Constants on Device."
+    write(IMAIN,*)
+  endif
+
+  ! initializes GPU and outputs info to files for all processes
+  call prepare_cuda_device(myrank,ncuda_devices)
+
+  ! collects min/max of local devices found for statistics
+  call sync_all()
+  call min_all_i(ncuda_devices,ncuda_devices_min)
+  call max_all_i(ncuda_devices,ncuda_devices_max)
+
+  ! prepares general fields on GPU
+  call prepare_constants_device(Mesh_pointer, &
+                                  NGLLX, NSPEC_AB, NGLOB_AB, &
+                                  xix, xiy, xiz, etax,etay,etaz, gammax, gammay, gammaz, &
+                                  kappastore, mustore,ibool, &
+                                  num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, &
+                                  nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh, &
+                                  hprime_xx, hprime_yy, hprime_zz, &
+                                  hprimewgll_xx, hprimewgll_yy, hprimewgll_zz, &
+                                  wgllwgll_xy, wgllwgll_xz, wgllwgll_yz, &
+                                  ABSORBING_CONDITIONS, &
+                                  abs_boundary_ispec, abs_boundary_ijk, &
+                                  abs_boundary_normal, &
+                                  abs_boundary_jacobian2Dw, &
+                                  num_abs_boundary_faces, &
+                                  ispec_is_inner, &
+                                  NSOURCES, nsources_local, &
+                                  sourcearrays, islice_selected_source, ispec_selected_source, &
+                                  number_receiver_global, ispec_selected_rec, &
+                                  nrec, nrec_local, &
+                                  SIMULATION_TYPE, &
+                                  USE_MESH_COLORING_GPU, &
+                                  nspec_acoustic,nspec_elastic)
+
+
+  ! prepares fields on GPU for acoustic simulations
+  if( ACOUSTIC_SIMULATION ) then
+    call prepare_fields_acoustic_device(Mesh_pointer,rmass_acoustic,rhostore,kappastore, &
+                                  num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
+                                  ispec_is_acoustic, &
+                                  NOISE_TOMOGRAPHY,num_free_surface_faces, &
+                                  free_surface_ispec,free_surface_ijk, &
+                                  ABSORBING_CONDITIONS,b_reclen_potential,b_absorb_potential, &
+                                  ELASTIC_SIMULATION, num_coupling_ac_el_faces, &
+                                  coupling_ac_el_ispec,coupling_ac_el_ijk, &
+                                  coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
+                                  num_colors_outer_acoustic,num_colors_inner_acoustic, &
+                                  num_elem_colors_acoustic)
+
+    if( SIMULATION_TYPE == 3 ) &
+      call prepare_fields_acoustic_adj_dev(Mesh_pointer, &
+                                  SIMULATION_TYPE, &
+                                  APPROXIMATE_HESS_KL)
+
+  endif
+
+  ! prepares fields on GPU for elastic simulations
+  if( ELASTIC_SIMULATION ) then
+    call prepare_fields_elastic_device(Mesh_pointer, NDIM*NGLOB_AB, &
+                                  rmass,rho_vp,rho_vs, &
+                                  num_phase_ispec_elastic,phase_ispec_inner_elastic, &
+                                  ispec_is_elastic, &
+                                  ABSORBING_CONDITIONS,b_absorb_field,b_reclen_field, &
+                                  SIMULATION_TYPE,SAVE_FORWARD, &
+                                  COMPUTE_AND_STORE_STRAIN, &
+                                  epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                                  epsilondev_xz,epsilondev_yz, &
+                                  ATTENUATION, &
+                                  size(R_xx), &
+                                  R_xx,R_yy,R_xy,R_xz,R_yz, &
+                                  one_minus_sum_beta,factor_common, &
+                                  alphaval,betaval,gammaval, &
+                                  OCEANS,rmass_ocean_load, &
+                                  NOISE_TOMOGRAPHY, &
+                                  free_surface_normal,free_surface_ispec,free_surface_ijk, &
+                                  num_free_surface_faces, &
+                                  ACOUSTIC_SIMULATION, &
+                                  num_colors_outer_elastic,num_colors_inner_elastic, &
+                                  num_elem_colors_elastic, &
+                                  ANISOTROPY, &
+                                  c11store,c12store,c13store,c14store,c15store,c16store, &
+                                  c22store,c23store,c24store,c25store,c26store, &
+                                  c33store,c34store,c35store,c36store, &
+                                  c44store,c45store,c46store,c55store,c56store,c66store)
+
+    if( SIMULATION_TYPE == 3 ) &
+      call prepare_fields_elastic_adj_dev(Mesh_pointer, NDIM*NGLOB_AB, &
+                                  SIMULATION_TYPE, &
+                                  COMPUTE_AND_STORE_STRAIN, &
+                                  epsilon_trace_over_3, &
+                                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                                  b_epsilondev_xz,b_epsilondev_yz, &
+                                  b_epsilon_trace_over_3, &
+                                  ATTENUATION,size(R_xx), &
+                                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                                  b_alphaval,b_betaval,b_gammaval, &
+                                  APPROXIMATE_HESS_KL)
+
+  endif
+
+  ! prepares needed receiver array for adjoint runs
+  if( SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3 ) &
+    call prepare_sim2_or_3_const_device(Mesh_pointer, &
+                                       islice_selected_rec,size(islice_selected_rec), &
+                                       nadj_rec_local,nrec,myrank)
+
+  ! prepares fields on GPU for noise simulations
+  if ( NOISE_TOMOGRAPHY > 0 ) then
+    ! note: noise tomography is only supported for elastic domains so far.
+
+    ! copies noise  arrays to GPU
+    call prepare_fields_noise_device(Mesh_pointer, NSPEC_AB, NGLOB_AB, &
+                                  free_surface_ispec, &
+                                  free_surface_ijk, &
+                                  num_free_surface_faces, &
+                                  SIMULATION_TYPE,NOISE_TOMOGRAPHY, &
+                                  NSTEP,noise_sourcearray, &
+                                  normal_x_noise,normal_y_noise,normal_z_noise, &
+                                  mask_noise,free_surface_jacobian2Dw)
+
+  endif ! NOISE_TOMOGRAPHY
+
+  ! prepares gravity arrays
+  if( GRAVITY ) then
+    call prepare_fields_gravity_device(Mesh_pointer,GRAVITY, &
+                                    minus_deriv_gravity,minus_g,wgll_cube,&
+                                    ACOUSTIC_SIMULATION,rhostore)
+  endif
+
+  ! sends initial data to device
+
+  ! puts acoustic initial fields onto GPU
+  if( ACOUSTIC_SIMULATION ) then
+    call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
+                          potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
+    if( SIMULATION_TYPE == 3 ) &
+      call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
+                          b_potential_dot_acoustic,b_potential_dot_dot_acoustic,Mesh_pointer)
+  endif
+
+  ! puts elastic initial fields onto GPU
+  if( ELASTIC_SIMULATION ) then
+    ! transfer forward and backward fields to device with initial values
+    call transfer_fields_el_to_device(NDIM*NGLOB_AB,displ,veloc,accel,Mesh_pointer)
+    if(SIMULATION_TYPE == 3) &
+      call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
+  endif
+
+  ! outputs GPU usage to files for all processes
+  call output_free_device_memory(myrank)
+
+  ! outputs usage for main process
+  if( myrank == 0 ) then
+    write(IMAIN,*)"  GPU number of devices per node: min =",ncuda_devices_min
+    write(IMAIN,*)"                                  max =",ncuda_devices_max
+    write(IMAIN,*)
+
+    call get_free_device_memory(free_mb,used_mb,total_mb)
+    write(IMAIN,*)"  GPU usage: free  =",free_mb," MB",nint(free_mb/total_mb*100.0),"%"
+    write(IMAIN,*)"             used  =",used_mb," MB",nint(used_mb/total_mb*100.0),"%"
+    write(IMAIN,*)"             total =",total_mb," MB",nint(total_mb/total_mb*100.0),"%"
+    write(IMAIN,*)
+  endif
+
+  end subroutine prepare_timerun_GPU
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! OpenMP version uses "special" compute_forces_elastic_Dev routine
+! we need to set num_elem_colors_elastic arrays
+
+#ifdef OPENMP_MODE
+  subroutine prepare_timerun_OpenMP()
+
+  use specfem_par
+  use specfem_par_elastic
+  implicit none
+
+  ! local parameters
+  integer :: ier
+  integer :: NUM_THREADS
+  integer :: OMP_GET_MAX_THREADS
+
+  ! OpenMP for elastic simulation only supported yet
+  if( ELASTIC_SIMULATION ) then
+
+    NUM_THREADS = OMP_GET_MAX_THREADS()
+    if( myrank == 0 ) then
+      write(IMAIN,*)
+      write(IMAIN,*) 'Using:',NUM_THREADS, ' OpenMP threads'
+      write(IMAIN,*)
+    endif
+
+    ! allocate cfe_Dev_openmp local arrays for OpenMP version
+    allocate(dummyx_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(dummyy_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(dummyz_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(newtempx1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(newtempx2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(newtempx3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(newtempy1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(newtempy2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(newtempy3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(newtempz1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(newtempz2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(newtempz3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempx1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempx2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempx3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempy1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempy2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempy3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempz1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempz2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempz3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+
+    ! set num_elem_colors array in case no mesh coloring is used
+    if( .not. USE_MESH_COLORING_GPU ) then
+      ! deallocate dummy array
+      if( allocated(num_elem_colors_elastic) ) deallocate(num_elem_colors_elastic)
+
+      ! loads with corresonding values
+      num_colors_outer_elastic = 1
+      num_colors_inner_elastic = 1
+      allocate(num_elem_colors_elastic(num_colors_outer_elastic + num_colors_inner_elastic),stat=ier)
+      if( ier /= 0 ) stop 'error allocating num_elem_colors_elastic array'
+
+      ! sets to all elements in inner/outer phase
+      num_elem_colors_elastic(1) = nspec_outer_elastic
+      num_elem_colors_elastic(2) = nspec_inner_elastic
+    endif
+
+  endif
+
+  end subroutine prepare_timerun_OpenMP
+#endif

Deleted: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -1,1028 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 0
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and University of Pau / CNRS / INRIA
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-!
-! United States and French Government Sponsorship Acknowledged.
-
-  subroutine prepare_timerun()
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  use specfem_par_movie
-
-  implicit none
-  character(len=256) :: plot_file
-  integer :: ier
-
-  ! flag for any movie simulation
-  if( EXTERNAL_MESH_MOVIE_SURFACE .or. EXTERNAL_MESH_CREATE_SHAKEMAP .or. &
-     MOVIE_SURFACE .or. CREATE_SHAKEMAP .or. MOVIE_VOLUME .or. PNM_GIF_IMAGE ) then
-    MOVIE_SIMULATION = .true.
-  else
-    MOVIE_SIMULATION = .false.
-  endif
-
-  ! user info
-  if(myrank == 0) then
-
-    write(IMAIN,*)
-    if(ATTENUATION) then
-      write(IMAIN,*) 'incorporating attenuation using ',N_SLS,' standard linear solids'
-      if(USE_OLSEN_ATTENUATION) then
-        write(IMAIN,*) 'using Olsen''s attenuation'
-      else
-        write(IMAIN,*) 'not using Olsen''s attenuation'
-      endif
-    else
-      write(IMAIN,*) 'no attenuation'
-    endif
-
-    write(IMAIN,*)
-    if(ANISOTROPY) then
-      write(IMAIN,*) 'incorporating anisotropy'
-    else
-      write(IMAIN,*) 'no anisotropy'
-    endif
-
-    write(IMAIN,*)
-    if(OCEANS) then
-      write(IMAIN,*) 'incorporating the oceans using equivalent load'
-    else
-      write(IMAIN,*) 'no oceans'
-    endif
-
-    write(IMAIN,*)
-    if(GRAVITY) then
-      write(IMAIN,*) 'incorporating gravity'
-    else
-      write(IMAIN,*) 'no gravity'
-    endif
-
-    write(IMAIN,*)
-    if(ACOUSTIC_SIMULATION) then
-      write(IMAIN,*) 'incorporating acoustic simulation'
-    else
-      write(IMAIN,*) 'no acoustic simulation'
-    endif
-
-    write(IMAIN,*)
-    if(ELASTIC_SIMULATION) then
-      write(IMAIN,*) 'incorporating elastic simulation'
-    else
-      write(IMAIN,*) 'no elastic simulation'
-    endif
-
-    write(IMAIN,*)
-    if(POROELASTIC_SIMULATION) then
-      write(IMAIN,*) 'incorporating poroelastic simulation'
-    else
-      write(IMAIN,*) 'no poroelastic simulation'
-    endif
-    write(IMAIN,*)
-
-    write(IMAIN,*)
-    if(MOVIE_SIMULATION) then
-      write(IMAIN,*) 'incorporating movie simulation'
-    else
-      write(IMAIN,*) 'no movie simulation'
-    endif
-    write(IMAIN,*)
-
-  endif
-
-  ! synchronize all the processes before assembling the mass matrix
-  ! to make sure all the nodes have finished to read their databases
-  call sync_all()
-
-  ! sets up mass matrices
-  call prepare_timerun_mass_matrices()
-
-  ! initialize acoustic arrays to zero
-  if( ACOUSTIC_SIMULATION ) then
-    potential_acoustic(:) = 0._CUSTOM_REAL
-    potential_dot_acoustic(:) = 0._CUSTOM_REAL
-    potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
-    ! put negligible initial value to avoid very slow underflow trapping
-    if(FIX_UNDERFLOW_PROBLEM) potential_acoustic(:) = VERYSMALLVAL
-  endif
-
-  ! initialize elastic arrays to zero/verysmallvall
-  if( ELASTIC_SIMULATION ) then
-    displ(:,:) = 0._CUSTOM_REAL
-    veloc(:,:) = 0._CUSTOM_REAL
-    accel(:,:) = 0._CUSTOM_REAL
-    ! put negligible initial value to avoid very slow underflow trapping
-    if(FIX_UNDERFLOW_PROBLEM) displ(:,:) = VERYSMALLVAL
-  endif
-
-
-  ! distinguish between single and double precision for reals
-  if(CUSTOM_REAL == SIZE_REAL) then
-    deltat = sngl(DT)
-  else
-    deltat = DT
-  endif
-  deltatover2 = deltat/2._CUSTOM_REAL
-  deltatsqover2 = deltat*deltat/2._CUSTOM_REAL
-
-  ! seismograms
-  if (nrec_local > 0) then
-    ! allocate seismogram array
-    allocate(seismograms_d(NDIM,nrec_local,NSTEP),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array seismograms_d'
-    allocate(seismograms_v(NDIM,nrec_local,NSTEP),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array seismograms_v'
-    allocate(seismograms_a(NDIM,nrec_local,NSTEP),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array seismograms_a'
-
-    ! initialize seismograms
-    seismograms_d(:,:,:) = 0._CUSTOM_REAL
-    seismograms_v(:,:,:) = 0._CUSTOM_REAL
-    seismograms_a(:,:,:) = 0._CUSTOM_REAL
-  endif
-
-  ! synchronize all the processes
-  call sync_all()
-
-  ! prepares attenuation arrays
-  call prepare_timerun_attenuation()
-
-  ! prepares gravity arrays
-  call prepare_timerun_gravity()
-
-  ! initializes PML arrays
-  if( ABSORBING_CONDITIONS  ) then
-    if (SIMULATION_TYPE /= 1 .and. ABSORB_USE_PML )  then
-      write(IMAIN,*) 'NOTE: adjoint simulations and PML not supported yet...'
-    else
-      if( ABSORB_USE_PML ) then
-        call PML_initialize()
-      endif
-    endif
-  endif
-
-  ! opens source time function file
-  if(PRINT_SOURCE_TIME_FUNCTION .and. myrank == 0) then
-    ! print the source-time function
-    if(NSOURCES == 1) then
-      plot_file = '/plot_source_time_function.txt'
-    else
-     if(NSOURCES < 10) then
-        write(plot_file,"('/plot_source_time_function',i1,'.txt')") NSOURCES
-      else
-        write(plot_file,"('/plot_source_time_function',i2,'.txt')") NSOURCES
-      endif
-    endif
-    open(unit=IOSTF,file=trim(OUTPUT_FILES)//plot_file,status='unknown')
-  endif
-
-  ! user output
-  if(myrank == 0) then
-    write(IMAIN,*)
-    write(IMAIN,*) '           time step: ',sngl(DT),' s'
-    write(IMAIN,*) 'number of time steps: ',NSTEP
-    write(IMAIN,*) 'total simulated time: ',sngl(NSTEP*DT),' seconds'
-    write(IMAIN,*)
-  endif
-
-  ! prepares ADJOINT simulations
-  call prepare_timerun_adjoint()
-
-  ! prepares noise simulations
-  call prepare_timerun_noise()
-
-  ! prepares GPU arrays
-  if(GPU_MODE) call prepare_timerun_GPU()
-
-  end subroutine prepare_timerun
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine prepare_timerun_mass_matrices()
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  implicit none
-
-! the mass matrix needs to be assembled with MPI here once and for all
-  if(ACOUSTIC_SIMULATION) then
-    call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_acoustic, &
-                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
-                        my_neighbours_ext_mesh)
-
-    ! fill mass matrix with fictitious non-zero values to make sure it can be inverted globally
-    where(rmass_acoustic <= 0._CUSTOM_REAL) rmass_acoustic = 1._CUSTOM_REAL
-    rmass_acoustic(:) = 1._CUSTOM_REAL / rmass_acoustic(:)
-
-  endif ! ACOUSTIC_SIMULATION
-
-  if(ELASTIC_SIMULATION) then
-    call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass, &
-                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                        my_neighbours_ext_mesh)
-
-    ! fill mass matrix with fictitious non-zero values to make sure it can be inverted globally
-    where(rmass <= 0._CUSTOM_REAL) rmass = 1._CUSTOM_REAL
-    rmass(:) = 1._CUSTOM_REAL / rmass(:)
-
-    if(OCEANS ) then
-      if( minval(rmass_ocean_load(:)) <= 0._CUSTOM_REAL) &
-        call exit_MPI(myrank,'negative ocean load mass matrix term')
-      rmass_ocean_load(:) = 1. / rmass_ocean_load(:)
-    endif
-
-  endif ! ELASTIC_SIMULATION
-
-  if(POROELASTIC_SIMULATION) then
-
-    stop 'poroelastic simulation not implemented yet'
-    ! but would be something like this...
-    call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_solid_poroelastic, &
-                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                        my_neighbours_ext_mesh)
-
-    call assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,rmass_fluid_poroelastic, &
-                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                        my_neighbours_ext_mesh)
-
-    ! fills mass matrix with fictitious non-zero values to make sure it can be inverted globally
-    where(rmass_solid_poroelastic <= 0._CUSTOM_REAL) rmass_solid_poroelastic = 1._CUSTOM_REAL
-    where(rmass_fluid_poroelastic <= 0._CUSTOM_REAL) rmass_fluid_poroelastic = 1._CUSTOM_REAL
-    rmass_solid_poroelastic(:) = 1._CUSTOM_REAL / rmass_solid_poroelastic(:)
-    rmass_fluid_poroelastic(:) = 1._CUSTOM_REAL / rmass_fluid_poroelastic(:)
-
-  endif ! POROELASTIC_SIMULATION
-
-  if(myrank == 0) write(IMAIN,*) 'end assembling MPI mass matrix'
-
-
-  end subroutine prepare_timerun_mass_matrices
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine prepare_timerun_attenuation()
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  implicit none
-
-  ! local parameters
-  double precision, dimension(N_SLS) :: tau_sigma_dble
-  double precision :: f_c_source
-  double precision :: MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD
-  real(kind=CUSTOM_REAL):: scale_factorl
-  integer :: i,j,k,ispec,ier
-  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: scale_factor
-
-  ! if attenuation is on, shift shear moduli to center frequency of absorption period band, i.e.
-  ! rescale mu to average (central) frequency for attenuation
-  if(ATTENUATION) then
-
-    ! initializes arrays
-    one_minus_sum_beta(:,:,:,:) = 1._CUSTOM_REAL
-    factor_common(:,:,:,:,:) = 1._CUSTOM_REAL
-
-    allocate( scale_factor(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB),stat=ier)
-    if( ier /= 0 ) call exit_mpi(myrank,'error allocation scale_factor')
-    scale_factor(:,:,:,:) = 1._CUSTOM_REAL
-
-    ! reads in attenuation arrays
-    open(unit=27, file=prname(1:len_trim(prname))//'attenuation.bin', &
-          status='old',action='read',form='unformatted',iostat=ier)
-    if( ier /= 0 ) then
-      print*,'error: could not open ',prname(1:len_trim(prname))//'attenuation.bin'
-      call exit_mpi(myrank,'error opening attenuation.bin file')
-    endif
-    read(27) ispec
-    if( ispec /= NSPEC_ATTENUATION_AB ) then
-      close(27)
-      print*,'error: attenuation file array ',ispec,'should be ',NSPEC_ATTENUATION_AB
-      call exit_mpi(myrank,'error attenuation array dimensions, please recompile and rerun generate_databases')
-    endif
-    read(27) one_minus_sum_beta
-    read(27) factor_common
-    read(27) scale_factor
-    close(27)
-
-
-    ! gets stress relaxation times tau_sigma, i.e.
-    ! precalculates tau_sigma depending on period band (constant for all Q_mu), and
-    ! determines central frequency f_c_source of attenuation period band
-    call get_attenuation_constants(min_resolved_period,tau_sigma_dble,&
-              f_c_source,MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD)
-
-    ! determines alphaval,betaval,gammaval for runge-kutta scheme
-    if(CUSTOM_REAL == SIZE_REAL) then
-      tau_sigma(:) = sngl(tau_sigma_dble(:))
-    else
-      tau_sigma(:) = tau_sigma_dble(:)
-    endif
-    call get_attenuation_memory_values(tau_sigma,deltat,alphaval,betaval,gammaval)
-
-    ! shifts shear moduli
-    do ispec = 1,NSPEC_AB
-
-      ! skips non elastic elements
-      if( ispec_is_elastic(ispec) .eqv. .false. ) cycle
-
-      ! determines attenuation factors for each GLL point
-      do k=1,NGLLZ
-        do j=1,NGLLY
-          do i=1,NGLLX
-
-            ! scales only mu moduli
-            scale_factorl = scale_factor(i,j,k,ispec)
-            mustore(i,j,k,ispec) = mustore(i,j,k,ispec) * scale_factorl
-
-          enddo
-        enddo
-      enddo
-    enddo
-
-    deallocate(scale_factor)
-
-    ! statistics
-    ! user output
-    if( myrank == 0 ) then
-      write(IMAIN,*)
-      write(IMAIN,*) "attenuation: "
-      write(IMAIN,*) "  reference period (s)   : ",sngl(1.0/ATTENUATION_f0_REFERENCE), &
-                    " frequency: ",sngl(ATTENUATION_f0_REFERENCE)
-      write(IMAIN,*) "  period band min/max (s): ",sngl(MIN_ATTENUATION_PERIOD),sngl(MAX_ATTENUATION_PERIOD)
-      write(IMAIN,*) "  central period (s)     : ",sngl(1.0/f_c_source), &
-                    " frequency: ",sngl(f_c_source)
-      write(IMAIN,*)
-    endif
-
-    ! clear memory variables if attenuation
-    ! initialize memory variables for attenuation
-    epsilondev_xx(:,:,:,:) = 0._CUSTOM_REAL
-    epsilondev_yy(:,:,:,:) = 0._CUSTOM_REAL
-    epsilondev_xy(:,:,:,:) = 0._CUSTOM_REAL
-    epsilondev_xz(:,:,:,:) = 0._CUSTOM_REAL
-    epsilondev_yz(:,:,:,:) = 0._CUSTOM_REAL
-
-    R_xx(:,:,:,:,:) = 0._CUSTOM_REAL
-    R_yy(:,:,:,:,:) = 0._CUSTOM_REAL
-    R_xy(:,:,:,:,:) = 0._CUSTOM_REAL
-    R_xz(:,:,:,:,:) = 0._CUSTOM_REAL
-    R_yz(:,:,:,:,:) = 0._CUSTOM_REAL
-
-    if(FIX_UNDERFLOW_PROBLEM) then
-      R_xx(:,:,:,:,:) = VERYSMALLVAL
-      R_yy(:,:,:,:,:) = VERYSMALLVAL
-      R_xy(:,:,:,:,:) = VERYSMALLVAL
-      R_xz(:,:,:,:,:) = VERYSMALLVAL
-      R_yz(:,:,:,:,:) = VERYSMALLVAL
-    endif
-  endif
-
-  end subroutine prepare_timerun_attenuation
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine prepare_timerun_gravity()
-
-! precomputes gravity factors
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  implicit none
-
-  ! local parameters
-  double precision RICB,RCMB,RTOPDDOUBLEPRIME, &
-    R80,R220,R400,R600,R670,R771,RMOHO,RMIDDLE_CRUST,ROCEAN
-  double precision :: rspl_gravity(NR),gspl(NR),gspl2(NR)
-  double precision :: radius,g,dg ! radius_km
-  !double precision :: g_cmb_dble,g_icb_dble
-  double precision :: rho,drhodr,vp,vs,Qkappa,Qmu
-  integer :: nspl_gravity !int_radius
-  integer :: i,j,k,iglob,ier
-
-  ! sets up weights needed for integration of gravity
-  do k=1,NGLLZ
-    do j=1,NGLLY
-      do i=1,NGLLX
-        wgll_cube(i,j,k) = sngl( wxgll(i)*wygll(j)*wzgll(k) )
-      enddo
-    enddo
-  enddo
-
-  ! store g, rho and dg/dr=dg using normalized radius in lookup table every 100 m
-  ! get density and velocity from PREM model using dummy doubling flag
-  ! this assumes that the gravity perturbations are small and smooth
-  ! and that we can neglect the 3D model and use PREM every 100 m in all cases
-  ! this is probably a rather reasonable assumption
-  if(GRAVITY) then
-
-    ! allocates gravity arrays
-    allocate( minus_deriv_gravity(NGLOB_AB), &
-             minus_g(NGLOB_AB), stat=ier)
-    if( ier /= 0 ) stop 'error allocating gravity arrays'
-
-    ! sets up spline table
-    call make_gravity(nspl_gravity,rspl_gravity,gspl,gspl2, &
-                          ROCEAN,RMIDDLE_CRUST,RMOHO,R80,R220,R400,R600,R670, &
-                          R771,RTOPDDOUBLEPRIME,RCMB,RICB)
-
-    ! pre-calculates gravity terms for all global points
-    do iglob = 1,NGLOB_AB
-
-      ! normalized radius ( zstore values given in m, negative values for depth)
-      radius = ( R_EARTH + zstore(iglob) ) / R_EARTH
-      call spline_evaluation(rspl_gravity,gspl,gspl2,nspl_gravity,radius,g)
-
-      ! use PREM density profile to calculate gravity (fine for other 1D models)
-      call model_prem_iso(radius,rho,drhodr,vp,vs,Qkappa,Qmu, &
-                        RICB,RCMB,RTOPDDOUBLEPRIME, &
-                        R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN)
-
-      dg = 4.0d0*rho - 2.0d0*g/radius
-
-      ! re-dimensionalize
-      g = g * R_EARTH*(PI*GRAV*RHOAV) ! in m / s^2 ( should be around 10 m/s^2 )
-      dg = dg * R_EARTH*(PI*GRAV*RHOAV) / R_EARTH ! gradient d/dz g , in 1/s^2
-
-      minus_deriv_gravity(iglob) = - dg
-      minus_g(iglob) = - g ! in negative z-direction
-
-      ! debug
-      !if( iglob == 1 .or. iglob == 1000 .or. iglob == 10000 ) then
-      !  ! re-dimensionalize
-      !  radius = radius * R_EARTH ! in m
-      !  vp = vp * R_EARTH*dsqrt(PI*GRAV*RHOAV)  ! in m / s
-      !  rho = rho  * RHOAV  ! in kg / m^3
-      !  print*,'gravity: radius=',radius,'g=',g,'depth=',radius-R_EARTH
-      !  print*,'vp=',vp,'rho=',rho,'kappa=',(vp**2) * rho
-      !  print*,'minus_g..=',minus_g(iglob)
-      !endif
-    enddo
-
-  else
-
-    ! allocates dummy gravity arrays
-    allocate( minus_deriv_gravity(0), &
-             minus_g(0), stat=ier)
-    if( ier /= 0 ) stop 'error allocating gravity arrays'
-
-  endif
-
-  end subroutine prepare_timerun_gravity
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine prepare_timerun_adjoint()
-
-! prepares adjoint simulations
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  implicit none
-  ! local parameters
-  integer :: ier
-  integer(kind=8) :: filesize
-
-! seismograms
-  if (nrec_local > 0 .and. SIMULATION_TYPE == 2 ) then
-    ! allocate Frechet derivatives array
-    allocate(Mxx_der(nrec_local),Myy_der(nrec_local), &
-            Mzz_der(nrec_local),Mxy_der(nrec_local), &
-            Mxz_der(nrec_local),Myz_der(nrec_local), &
-            sloc_der(NDIM,nrec_local),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array Mxx_der and following arrays'
-    Mxx_der = 0._CUSTOM_REAL
-    Myy_der = 0._CUSTOM_REAL
-    Mzz_der = 0._CUSTOM_REAL
-    Mxy_der = 0._CUSTOM_REAL
-    Mxz_der = 0._CUSTOM_REAL
-    Myz_der = 0._CUSTOM_REAL
-    sloc_der = 0._CUSTOM_REAL
-
-    allocate(seismograms_eps(NDIM,NDIM,nrec_local,NSTEP),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array seismograms_eps'
-    seismograms_eps(:,:,:,:) = 0._CUSTOM_REAL
-  endif
-
-! timing
-  if (SIMULATION_TYPE == 3) then
-
-    ! backward/reconstructed wavefields: time stepping is in time-reversed sense
-    ! (negative time increments)
-    if(CUSTOM_REAL == SIZE_REAL) then
-      b_deltat = - sngl(DT)
-    else
-      b_deltat = - DT
-    endif
-    b_deltatover2 = b_deltat/2._CUSTOM_REAL
-    b_deltatsqover2 = b_deltat*b_deltat/2._CUSTOM_REAL
-
-  endif
-
-! attenuation backward memories
-  if( ATTENUATION .and. SIMULATION_TYPE == 3 ) then
-
-    ! precompute Runge-Kutta coefficients if attenuation
-    call get_attenuation_memory_values(tau_sigma,b_deltat,b_alphaval,b_betaval,b_gammaval)
-
-  endif
-
-! initializes adjoint kernels and reconstructed/backward wavefields
-  if (SIMULATION_TYPE == 3)  then
-    ! elastic domain
-    if( ELASTIC_SIMULATION ) then
-      rho_kl(:,:,:,:)   = 0._CUSTOM_REAL
-      mu_kl(:,:,:,:)    = 0._CUSTOM_REAL
-      kappa_kl(:,:,:,:) = 0._CUSTOM_REAL
-
-      if ( APPROXIMATE_HESS_KL ) &
-        hess_kl(:,:,:,:)   = 0._CUSTOM_REAL
-
-      ! reconstructed/backward elastic wavefields
-      b_displ = 0._CUSTOM_REAL
-      b_veloc = 0._CUSTOM_REAL
-      b_accel = 0._CUSTOM_REAL
-      if(FIX_UNDERFLOW_PROBLEM) b_displ = VERYSMALLVAL
-
-      ! memory variables if attenuation
-      if( ATTENUATION ) then
-         b_R_xx = 0._CUSTOM_REAL
-         b_R_yy = 0._CUSTOM_REAL
-         b_R_xy = 0._CUSTOM_REAL
-         b_R_xz = 0._CUSTOM_REAL
-         b_R_yz = 0._CUSTOM_REAL
-         b_epsilondev_xx = 0._CUSTOM_REAL
-         b_epsilondev_yy = 0._CUSTOM_REAL
-         b_epsilondev_xy = 0._CUSTOM_REAL
-         b_epsilondev_xz = 0._CUSTOM_REAL
-         b_epsilondev_yz = 0._CUSTOM_REAL
-      endif
-
-    endif
-
-    ! acoustic domain
-    if( ACOUSTIC_SIMULATION ) then
-      rho_ac_kl(:,:,:,:)   = 0._CUSTOM_REAL
-      kappa_ac_kl(:,:,:,:) = 0._CUSTOM_REAL
-
-      if ( APPROXIMATE_HESS_KL ) &
-        hess_ac_kl(:,:,:,:)   = 0._CUSTOM_REAL
-
-      ! reconstructed/backward acoustic potentials
-      b_potential_acoustic = 0._CUSTOM_REAL
-      b_potential_dot_acoustic = 0._CUSTOM_REAL
-      b_potential_dot_dot_acoustic = 0._CUSTOM_REAL
-      if(FIX_UNDERFLOW_PROBLEM) b_potential_acoustic = VERYSMALLVAL
-
-    endif
-  endif
-
-! initialize Moho boundary index
-  if (SAVE_MOHO_MESH .and. SIMULATION_TYPE == 3) then
-    ispec2D_moho_top = 0
-    ispec2D_moho_bot = 0
-  endif
-
-! stacey absorbing fields will be reconstructed for adjoint simulations
-! using snapshot files of wavefields
-  if( ABSORBING_CONDITIONS ) then
-
-    ! opens absorbing wavefield saved/to-be-saved by forward simulations
-    if( num_abs_boundary_faces > 0 .and. (SIMULATION_TYPE == 3 .or. &
-          (SIMULATION_TYPE == 1 .and. SAVE_FORWARD)) ) then
-
-      b_num_abs_boundary_faces = num_abs_boundary_faces
-
-      ! elastic domains
-      if( ELASTIC_SIMULATION) then
-        ! allocates wavefield
-        allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
-        if( ier /= 0 ) stop 'error allocating array b_absorb_field'
-
-        ! size of single record
-        b_reclen_field = CUSTOM_REAL * NDIM * NGLLSQUARE * num_abs_boundary_faces
-
-        ! check integer size limit: size of b_reclen_field must fit onto an 4-byte integer
-        if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NDIM * NGLLSQUARE) ) then
-          print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_field
-          print *,'  ',CUSTOM_REAL, NDIM, NGLLSQUARE, num_abs_boundary_faces
-          print*,'bit size fortran: ',bit_size(b_reclen_field)
-          call exit_MPI(myrank,"error b_reclen_field integer limit")
-        endif
-
-        ! total file size
-        filesize = b_reclen_field
-        filesize = filesize*NSTEP
-
-        if (SIMULATION_TYPE == 3) then
-          ! opens existing files
-
-          ! uses fortran routines for reading
-          !open(unit=IOABS,file=trim(prname)//'absorb_field.bin',status='old',&
-          !      action='read',form='unformatted',access='direct', &
-          !      recl=b_reclen_field+2*4,iostat=ier )
-          !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_field.bin file')
-          ! uses c routines for faster reading
-          call open_file_abs_r(0,trim(prname)//'absorb_field.bin', &
-                              len_trim(trim(prname)//'absorb_field.bin'), &
-                              filesize)
-
-        else
-          ! opens new file
-          ! uses fortran routines for writing
-          !open(unit=IOABS,file=trim(prname)//'absorb_field.bin',status='unknown',&
-          !      form='unformatted',access='direct',&
-          !      recl=b_reclen_field+2*4,iostat=ier )
-          !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_field.bin file')
-          ! uses c routines for faster writing (file index 0 for acoutic domain file)
-          call open_file_abs_w(0,trim(prname)//'absorb_field.bin', &
-                              len_trim(trim(prname)//'absorb_field.bin'), &
-                              filesize)
-
-        endif
-      endif
-
-      ! acoustic domains
-      if( ACOUSTIC_SIMULATION) then
-        ! allocates wavefield
-        allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
-        if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
-
-        ! size of single record
-        b_reclen_potential = CUSTOM_REAL * NGLLSQUARE * num_abs_boundary_faces
-
-        ! check integer size limit: size of b_reclen_potential must fit onto an 4-byte integer
-        if( num_abs_boundary_faces > 2147483647 / (CUSTOM_REAL * NGLLSQUARE) ) then
-          print *,'reclen needed exceeds integer 4-byte limit: ',b_reclen_potential
-          print *,'  ',CUSTOM_REAL, NGLLSQUARE, num_abs_boundary_faces
-          print*,'bit size fortran: ',bit_size(b_reclen_potential)
-          call exit_MPI(myrank,"error b_reclen_potential integer limit")
-        endif
-
-        ! total file size (two lines to implicitly convert to 8-byte integers)
-        filesize = b_reclen_potential
-        filesize = filesize*NSTEP
-
-        ! debug check size limit
-        !if( NSTEP > 2147483647 / b_reclen_potential ) then
-        !  print *,'file size needed exceeds integer 4-byte limit: ',b_reclen_potential,NSTEP
-        !  print *,'  ',CUSTOM_REAL, NGLLSQUARE, num_abs_boundary_faces,NSTEP
-        !  print*,'file size fortran: ',filesize
-        !  print*,'file bit size fortran: ',bit_size(filesize)
-        !endif
-
-        if (SIMULATION_TYPE == 3) then
-          ! opens existing files
-          ! uses fortran routines for reading
-          !open(unit=IOABS_AC,file=trim(prname)//'absorb_potential.bin',status='old',&
-          !      action='read',form='unformatted',access='direct', &
-          !      recl=b_reclen_potential+2*4,iostat=ier )
-          !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_potential.bin file')
-
-          ! uses c routines for faster reading
-          call open_file_abs_r(1,trim(prname)//'absorb_potential.bin', &
-                              len_trim(trim(prname)//'absorb_potential.bin'), &
-                              filesize)
-
-        else
-          ! opens new file
-          ! uses fortran routines for writing
-          !open(unit=IOABS_AC,file=trim(prname)//'absorb_potential.bin',status='unknown',&
-          !      form='unformatted',access='direct',&
-          !      recl=b_reclen_potential+2*4,iostat=ier )
-          !if( ier /= 0 ) call exit_mpi(myrank,'error opening proc***_absorb_potential.bin file')
-          ! uses c routines for faster writing (file index 1 for acoutic domain file)
-          call open_file_abs_w(1,trim(prname)//'absorb_potential.bin', &
-                              len_trim(trim(prname)//'absorb_potential.bin'), &
-                              filesize)
-
-        endif
-      endif
-    else
-      ! needs dummy array
-      b_num_abs_boundary_faces = 1
-      if( ELASTIC_SIMULATION ) then
-        allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
-        if( ier /= 0 ) stop 'error allocating array b_absorb_field'
-      endif
-
-      if( ACOUSTIC_SIMULATION ) then
-        allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
-        if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
-      endif
-    endif
-  else ! ABSORBING_CONDITIONS
-    ! needs dummy array
-    b_num_abs_boundary_faces = 1
-    if( ELASTIC_SIMULATION ) then
-      allocate(b_absorb_field(NDIM,NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
-      if( ier /= 0 ) stop 'error allocating array b_absorb_field'
-    endif
-
-    if( ACOUSTIC_SIMULATION ) then
-      allocate(b_absorb_potential(NGLLSQUARE,b_num_abs_boundary_faces),stat=ier)
-      if( ier /= 0 ) stop 'error allocating array b_absorb_potential'
-    endif
-  endif
-
-
-  end subroutine prepare_timerun_adjoint
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine prepare_timerun_noise()
-
-! prepares noise simulations
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  use specfem_par_movie
-  implicit none
-  ! local parameters
-  integer :: ier
-
-  ! for noise simulations
-  if ( NOISE_TOMOGRAPHY /= 0 ) then
-
-    ! checks if free surface is defined
-    if( num_free_surface_faces == 0 ) then
-       write(*,*) myrank, " doesn't have a free_surface_face"
-       ! stop 'error: noise simulations need a free surface'
-    endif
-
-    ! allocates arrays
-    allocate(noise_sourcearray(NDIM,NGLLX,NGLLY,NGLLZ,NSTEP),stat=ier)
-    if( ier /= 0 ) call exit_mpi(myrank,'error allocating noise source array')
-
-    allocate(normal_x_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array normal_x_noise'
-    allocate(normal_y_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array normal_y_noise'
-    allocate(normal_z_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array normal_z_noise'
-    allocate(mask_noise(NGLLSQUARE*num_free_surface_faces),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array mask_noise'
-    allocate(noise_surface_movie(NDIM,NGLLSQUARE,num_free_surface_faces),stat=ier)
-    if( ier /= 0 ) stop 'error allocating array noise_surface_movie'
-
-    ! initializes
-    noise_sourcearray(:,:,:,:,:) = 0._CUSTOM_REAL
-    normal_x_noise(:)            = 0._CUSTOM_REAL
-    normal_y_noise(:)            = 0._CUSTOM_REAL
-    normal_z_noise(:)            = 0._CUSTOM_REAL
-    mask_noise(:)                = 0._CUSTOM_REAL
-    noise_surface_movie(:,:,:) = 0._CUSTOM_REAL
-
-    ! sets up noise source for master receiver station
-    call read_parameters_noise(myrank,nrec,NSTEP,NGLLSQUARE*num_free_surface_faces, &
-                               islice_selected_rec,xi_receiver,eta_receiver,gamma_receiver,nu, &
-                               noise_sourcearray,xigll,yigll,zigll, &
-                               ibool, &
-                               xstore,ystore,zstore, &
-                               irec_master_noise,normal_x_noise,normal_y_noise,normal_z_noise,mask_noise, &
-                               NSPEC_AB,NGLOB_AB, &
-                               num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
-                               ispec_is_acoustic)
-
-    ! checks flags for noise simulation
-    call check_parameters_noise(myrank,NOISE_TOMOGRAPHY,SIMULATION_TYPE,SAVE_FORWARD, &
-                                LOCAL_PATH, &
-                                num_free_surface_faces,NSTEP)
-  endif
-
-  end subroutine prepare_timerun_noise
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine prepare_timerun_GPU()
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  use specfem_par_movie
-
-  implicit none
-  real :: free_mb,used_mb,total_mb
-  integer :: ncuda_devices,ncuda_devices_min,ncuda_devices_max
-
-  ! GPU_MODE now defined in Par_file
-  if(myrank == 0 ) then
-    write(IMAIN,*)
-    write(IMAIN,*) "GPU_MODE Active. Preparing Fields and Constants on Device."
-    write(IMAIN,*)
-  endif
-
-  ! initializes GPU and outputs info to files for all processes
-  call prepare_cuda_device(myrank,ncuda_devices)
-
-  ! collects min/max of local devices found for statistics
-  call sync_all()
-  call min_all_i(ncuda_devices,ncuda_devices_min)
-  call max_all_i(ncuda_devices,ncuda_devices_max)
-
-  ! prepares general fields on GPU
-  call prepare_constants_device(Mesh_pointer, &
-                                  NGLLX, NSPEC_AB, NGLOB_AB, &
-                                  xix, xiy, xiz, etax,etay,etaz, gammax, gammay, gammaz, &
-                                  kappastore, mustore,ibool, &
-                                  num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh, &
-                                  nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh, &
-                                  hprime_xx, hprime_yy, hprime_zz, &
-                                  hprimewgll_xx, hprimewgll_yy, hprimewgll_zz, &
-                                  wgllwgll_xy, wgllwgll_xz, wgllwgll_yz, &
-                                  ABSORBING_CONDITIONS, &
-                                  abs_boundary_ispec, abs_boundary_ijk, &
-                                  abs_boundary_normal, &
-                                  abs_boundary_jacobian2Dw, &
-                                  num_abs_boundary_faces, &
-                                  ispec_is_inner, &
-                                  NSOURCES, nsources_local, &
-                                  sourcearrays, islice_selected_source, ispec_selected_source, &
-                                  number_receiver_global, ispec_selected_rec, &
-                                  nrec, nrec_local, &
-                                  SIMULATION_TYPE, &
-                                  USE_MESH_COLORING_GPU, &
-                                  nspec_acoustic,nspec_elastic)
-
-
-  ! prepares fields on GPU for acoustic simulations
-  if( ACOUSTIC_SIMULATION ) then
-    call prepare_fields_acoustic_device(Mesh_pointer,rmass_acoustic,rhostore,kappastore, &
-                                  num_phase_ispec_acoustic,phase_ispec_inner_acoustic, &
-                                  ispec_is_acoustic, &
-                                  NOISE_TOMOGRAPHY,num_free_surface_faces, &
-                                  free_surface_ispec,free_surface_ijk, &
-                                  ABSORBING_CONDITIONS,b_reclen_potential,b_absorb_potential, &
-                                  ELASTIC_SIMULATION, num_coupling_ac_el_faces, &
-                                  coupling_ac_el_ispec,coupling_ac_el_ijk, &
-                                  coupling_ac_el_normal,coupling_ac_el_jacobian2Dw, &
-                                  num_colors_outer_acoustic,num_colors_inner_acoustic, &
-                                  num_elem_colors_acoustic)
-
-    if( SIMULATION_TYPE == 3 ) &
-      call prepare_fields_acoustic_adj_dev(Mesh_pointer, &
-                                  SIMULATION_TYPE, &
-                                  APPROXIMATE_HESS_KL)
-
-  endif
-
-  ! prepares fields on GPU for elastic simulations
-  if( ELASTIC_SIMULATION ) then
-    call prepare_fields_elastic_device(Mesh_pointer, NDIM*NGLOB_AB, &
-                                  rmass,rho_vp,rho_vs, &
-                                  num_phase_ispec_elastic,phase_ispec_inner_elastic, &
-                                  ispec_is_elastic, &
-                                  ABSORBING_CONDITIONS,b_absorb_field,b_reclen_field, &
-                                  SIMULATION_TYPE,SAVE_FORWARD, &
-                                  COMPUTE_AND_STORE_STRAIN, &
-                                  epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                                  epsilondev_xz,epsilondev_yz, &
-                                  ATTENUATION, &
-                                  size(R_xx), &
-                                  R_xx,R_yy,R_xy,R_xz,R_yz, &
-                                  one_minus_sum_beta,factor_common, &
-                                  alphaval,betaval,gammaval, &
-                                  OCEANS,rmass_ocean_load, &
-                                  NOISE_TOMOGRAPHY, &
-                                  free_surface_normal,free_surface_ispec,free_surface_ijk, &
-                                  num_free_surface_faces, &
-                                  ACOUSTIC_SIMULATION, &
-                                  num_colors_outer_elastic,num_colors_inner_elastic, &
-                                  num_elem_colors_elastic, &
-                                  ANISOTROPY, &
-                                  c11store,c12store,c13store,c14store,c15store,c16store, &
-                                  c22store,c23store,c24store,c25store,c26store, &
-                                  c33store,c34store,c35store,c36store, &
-                                  c44store,c45store,c46store,c55store,c56store,c66store)
-
-    if( SIMULATION_TYPE == 3 ) &
-      call prepare_fields_elastic_adj_dev(Mesh_pointer, NDIM*NGLOB_AB, &
-                                  SIMULATION_TYPE, &
-                                  COMPUTE_AND_STORE_STRAIN, &
-                                  epsilon_trace_over_3, &
-                                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                                  b_epsilondev_xz,b_epsilondev_yz, &
-                                  b_epsilon_trace_over_3, &
-                                  ATTENUATION,size(R_xx), &
-                                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                                  b_alphaval,b_betaval,b_gammaval, &
-                                  APPROXIMATE_HESS_KL)
-
-  endif
-
-  ! prepares needed receiver array for adjoint runs
-  if( SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3 ) &
-    call prepare_sim2_or_3_const_device(Mesh_pointer, &
-                                       islice_selected_rec,size(islice_selected_rec), &
-                                       nadj_rec_local,nrec,myrank)
-
-  ! prepares fields on GPU for noise simulations
-  if ( NOISE_TOMOGRAPHY > 0 ) then
-    ! note: noise tomography is only supported for elastic domains so far.
-
-    ! copies noise  arrays to GPU
-    call prepare_fields_noise_device(Mesh_pointer, NSPEC_AB, NGLOB_AB, &
-                                  free_surface_ispec, &
-                                  free_surface_ijk, &
-                                  num_free_surface_faces, &
-                                  SIMULATION_TYPE,NOISE_TOMOGRAPHY, &
-                                  NSTEP,noise_sourcearray, &
-                                  normal_x_noise,normal_y_noise,normal_z_noise, &
-                                  mask_noise,free_surface_jacobian2Dw)
-
-  endif ! NOISE_TOMOGRAPHY
-
-  ! prepares gravity arrays
-  if( GRAVITY ) then
-    call prepare_fields_gravity_device(Mesh_pointer,GRAVITY, &
-                                    minus_deriv_gravity,minus_g,wgll_cube,&
-                                    ACOUSTIC_SIMULATION,rhostore)
-  endif
-  
-  ! sends initial data to device
-
-  ! puts acoustic initial fields onto GPU
-  if( ACOUSTIC_SIMULATION ) then
-    call transfer_fields_ac_to_device(NGLOB_AB,potential_acoustic, &
-                          potential_dot_acoustic,potential_dot_dot_acoustic,Mesh_pointer)
-    if( SIMULATION_TYPE == 3 ) &
-      call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
-                          b_potential_dot_acoustic,b_potential_dot_dot_acoustic,Mesh_pointer)
-  endif
-
-  ! puts elastic initial fields onto GPU
-  if( ELASTIC_SIMULATION ) then
-    ! transfer forward and backward fields to device with initial values
-    call transfer_fields_el_to_device(NDIM*NGLOB_AB,displ,veloc,accel,Mesh_pointer)
-    if(SIMULATION_TYPE == 3) &
-      call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
-  endif
-
-  ! outputs GPU usage to files for all processes
-  call output_free_device_memory(myrank)
-
-  ! outputs usage for main process
-  if( myrank == 0 ) then
-    write(IMAIN,*)"  GPU number of devices per node: min =",ncuda_devices_min
-    write(IMAIN,*)"                                  max =",ncuda_devices_max
-    write(IMAIN,*)
-
-    call get_free_device_memory(free_mb,used_mb,total_mb)
-    write(IMAIN,*)"  GPU usage: free  =",free_mb," MB",nint(free_mb/total_mb*100.0),"%"
-    write(IMAIN,*)"             used  =",used_mb," MB",nint(used_mb/total_mb*100.0),"%"
-    write(IMAIN,*)"             total =",total_mb," MB",nint(total_mb/total_mb*100.0),"%"
-    write(IMAIN,*)
-  endif
-
-  end subroutine prepare_timerun_GPU

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/read_mesh_databases.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -35,15 +35,7 @@
   implicit none
   real(kind=CUSTOM_REAL):: minl,maxl,min_all,max_all
   integer :: ier,inum
-  integer NUM_THREADS
-  integer OMP_GET_MAX_THREADS
-  
-  NUM_THREADS = OMP_GET_MAX_THREADS()
-  if( myrank == 0 ) then
-    write(IMAIN,*) 'Using:',NUM_THREADS, ' OpenMP threads' 
-  endif
-  
-  
+
 ! start reading the databasesa
 
 ! info about external mesh simulation
@@ -122,30 +114,6 @@
     allocate(accel(NDIM,NGLOB_AB),stat=ier)
     if( ier /= 0 ) stop 'error allocating array accel'
 
-    ! allocate cfe_Dev_openmp local arrays for OpenMP version
-    allocate(dummyx_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(dummyy_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(dummyz_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(newtempx1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(newtempx2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(newtempx3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(newtempy1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(newtempy2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(newtempy3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(newtempz1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(newtempz2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(newtempz3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(tempx1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(tempx2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(tempx3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(tempy1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(tempy2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(tempy3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(tempz1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(tempz2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-    allocate(tempz3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
-
-    
     allocate(rmass(NGLOB_AB),stat=ier)
     if( ier /= 0 ) stop 'error allocating array rmass'
     allocate(rho_vp(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
@@ -204,7 +172,7 @@
     ! reads mass matrices
     read(27,iostat=ier) rmass
     if( ier /= 0 ) stop 'error reading in array rmass'
-    
+
     if( OCEANS ) then
       ! ocean mass matrix
       allocate(rmass_ocean_load(NGLOB_AB),stat=ier)
@@ -221,24 +189,24 @@
     if( ier /= 0 ) stop 'error reading in array rho_vp'
     read(27,iostat=ier) rho_vs
     if( ier /= 0 ) stop 'error reading in array rho_vs'
-    
+
     ! checks if rhostore is available for gravity
     if( GRAVITY ) then
-    
+
       if( .not. ACOUSTIC_SIMULATION ) then
         ! rho array needed for gravity
         allocate(rhostore(NGLLX,NGLLY,NGLLZ,NSPEC_AB),stat=ier)
         if( ier /= 0 ) stop 'error allocating array rhostore'
-        
+
         ! extract rho information from mu = rho * vs * vs and rho_vs = rho * vs
         rhostore = 0.0_CUSTOM_REAL
-        where( mustore > TINYVAL ) 
+        where( mustore > TINYVAL )
           rhostore = (rho_vs*rho_vs) / mustore
         endwhere
 
         ! note: the construct below leads to a segmentation fault (ifort v11.1). not sure why...
         !          (where statement - standard fortran 95)
-        !where( mustore > TINYVAL ) 
+        !where( mustore > TINYVAL )
         !  rhostore = (rho_vs*rho_vs) / mustore
         !elsewhere
         !  rhostore = 0.0_CUSTOM_REAL

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -288,7 +288,7 @@
        dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3,&
        newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3,&
        tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
-  
+
 ! mass matrix
   real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90	2012-02-24 21:47:00 UTC (rev 19679)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90	2012-02-25 01:37:33 UTC (rev 19680)
@@ -91,7 +91,8 @@
     hgammar(:) = hgammar_store(irec_local,:)
 
     ! forward simulations
-    if (SIMULATION_TYPE == 1)  then
+    select case( SIMULATION_TYPE )
+    case( 1 )
 
       ! receiver's spectral element
       ispec = ispec_selected_rec(irec)
@@ -132,7 +133,7 @@
       endif ! acoustic
 
     !adjoint simulations
-    else if (SIMULATION_TYPE == 2) then
+    case( 2 )
 
       ! adjoint source is placed at receiver
       ispec = ispec_selected_source(irec)
@@ -208,7 +209,7 @@
       endif ! acoustic
 
     !adjoint simulations
-    else if (SIMULATION_TYPE == 3) then
+    case( 3 )
 
       ispec = ispec_selected_rec(irec)
 
@@ -247,7 +248,7 @@
                         dxd,dyd,dzd,vxd,vyd,vzd,axd,ayd,azd)
       endif ! acoustic
 
-    endif ! SIMULATION_TYPE
+    end select ! SIMULATION_TYPE
 
 ! store North, East and Vertical components
 ! distinguish between single and double precision for reals
@@ -269,12 +270,9 @@
 ! write the current or final seismograms
   if((mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0 .or. it == NSTEP) .and. (.not.SU_FORMAT)) then
     if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
-      call write_seismograms_to_file(myrank,seismograms_d,number_receiver_global,station_name, &
-            network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,1,SIMULATION_TYPE)
-      call write_seismograms_to_file(myrank,seismograms_v,number_receiver_global,station_name, &
-            network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,2,SIMULATION_TYPE)
-      call write_seismograms_to_file(myrank,seismograms_a,number_receiver_global,station_name, &
-            network_name,nrec,nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,3,SIMULATION_TYPE)
+      call write_seismograms_to_file(seismograms_d,1)
+      call write_seismograms_to_file(seismograms_v,2)
+      call write_seismograms_to_file(seismograms_a,3)
     else
       call write_adj_seismograms_to_file(myrank,seismograms_d,number_receiver_global, &
             nrec_local,it,DT,NSTEP,t0,LOCAL_PATH,1)
@@ -294,28 +292,19 @@
 
 ! write seismograms to text files
 
-  subroutine write_seismograms_to_file(myrank,seismograms,number_receiver_global, &
-               station_name,network_name,nrec,nrec_local, &
-               it,DT,NSTEP,t0,LOCAL_PATH,istore,SIMULATION_TYPE)
+  subroutine write_seismograms_to_file(seismograms,istore)
 
+  use constants
+  use specfem_par,only: &
+          myrank,number_receiver_global,station_name,network_name, &
+          nrec,nrec_local,islice_selected_rec, &
+          it,DT,NSTEP,t0,LOCAL_PATH,SIMULATION_TYPE
+
   implicit none
 
-  include "constants.h"
-
-  integer :: NSTEP,it
-  integer :: nrec,nrec_local
-  integer :: myrank,istore
-  integer :: SIMULATION_TYPE
-
-  integer, dimension(nrec_local) :: number_receiver_global
+  integer :: istore
   real(kind=CUSTOM_REAL), dimension(NDIM,nrec_local,NSTEP) :: seismograms
 
-  double precision t0,DT
-
-  character(len=256) LOCAL_PATH
-  character(len=MAX_LENGTH_STATION_NAME), dimension(nrec) :: station_name
-  character(len=MAX_LENGTH_NETWORK_NAME), dimension(nrec) :: network_name
-
   ! local parameters
   integer irec,irec_local
   integer irecord
@@ -327,6 +316,7 @@
   integer :: nrec_local_received,NPROCTOT,total_seismos,receiver,sender
   integer :: iproc,ier
   integer,dimension(1) :: tmp_nrec_local_received,tmp_irec,tmp_nrec_local
+  integer,dimension(:),allocatable:: islice_num_rec_local
 
   ! saves displacement, velocity or acceleration
   if(istore == 1) then
@@ -374,8 +364,22 @@
 
       ! loop on all the slices
       call world_size(NPROCTOT)
+
+      ! counts number of local receivers for each slice
+      allocate(islice_num_rec_local(0:NPROCTOT-1),stat=ier)
+      if( ier /= 0 ) call exit_mpi(myrank,'error allocating islice_num_rec_local')
+      islice_num_rec_local(:) = 0
+      do irec = 1,nrec
+        iproc = islice_selected_rec(irec)
+        islice_num_rec_local(iproc) = islice_num_rec_local(iproc) + 1
+      enddo
+
+      ! loops on all the slices
       do iproc = 0,NPROCTOT-1
 
+        ! communicate only with processes which contain local receivers
+        if( islice_num_rec_local(iproc) == 0 ) cycle
+
         ! receive except from proc 0, which is me and therefore I already have this value
         sender = iproc
         if(iproc /= 0) then
@@ -415,6 +419,7 @@
           enddo ! nrec_local_received
         endif ! if(nrec_local_received > 0 )
       enddo ! NPROCTOT-1
+      deallocate(islice_num_rec_local)
 
       write(IMAIN,*) 'Component: .sem'//component
       write(IMAIN,*) '  total number of receivers saved is ',total_seismos,' out of ',nrec

Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/create_specfem3D_gpu_cuda_method_stubs.pl
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/create_specfem3D_gpu_cuda_method_stubs.pl	                        (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/create_specfem3D_gpu_cuda_method_stubs.pl	2012-02-25 01:37:33 UTC (rev 19680)
@@ -0,0 +1,141 @@
+#!/usr/bin/perl
+
+#
+#  Script to extract the function declarations in cuda files
+#
+#
+# usage: ./ceate_specfem3D_gpu_cuda_method_stubs.pl 
+#             run in directory root SPECFEM3D/
+#
+
+$outfile = "src/cuda/specfem3D_gpu_cuda_method_stubs.c";
+
+
+open(IOUT,"> _____temp_tutu_____");
+
+$header = <<END;
+/* 
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "config.h"
+
+typedef float realw;
+
+END
+
+
+$warning = <<END;
+ fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\\n");
+ exit(1);
+END
+
+print IOUT "$header \n";
+
+$success = 0;
+
+ at objects = `ls src/cuda/*.cu`;
+
+foreach $name (@objects) {  
+  chop $name;
+  print "extracting word in file $name ...\n";
+
+  print IOUT "\n//\n// $name\n//\n\n";  
+  
+  # change tabs to white spaces
+  system("expand -2 < $name > _____temp_tutu01_____");  
+  open(IIN,"<_____temp_tutu01_____");
+
+  
+  # open the source file
+  $success = 1;
+  $do_extract = 0;
+  while($line = <IIN>) {
+    chop $line;
+    
+    # suppress trailing white spaces and carriage return
+    $line =~ s/\s*$//;
+    
+    # change the version number and copyright information
+    #    $line =~ s#\(c\) California Institute of Technology and University of Pau, October 2007#\(c\) California Institute of Technology and University of Pau, November 2007#og;
+    #    $line =~ s#rmass_sigma#rmass_time_integral_of_sigma#og;
+    
+    if($line =~ /extern "C"/){
+      # new function declaration starts  
+      #print "$line\n";
+      if( $line =~/FC_FUNC/ ){ 
+        # function declaration on same line as extern, ask for line skip
+        print "problem: please add a line break after extern 'C' here:";
+        print "$line\n";
+        $success = 0;
+        close(IIN);  
+        exit;
+      }
+      $do_extract = 1;
+      next;          
+    }
+    
+    # extract section
+    if($do_extract == 1 ){
+      # function declaration
+      if($line =~ /{/){
+        # function declaration ends
+        if( $line =~ /PREPARE_CUDA_DEVICE/ ){
+          # adds warning
+          print IOUT "$line \n$warning\} \n\n";
+        }else{
+          print IOUT "$line\} \n\n";
+        }
+        $do_extract = 0;
+      }else{
+        # write line to the output file
+        print IOUT "$line\n";  
+      }
+      next;
+    }
+  }
+  close(IIN);  
+
+  if( $success == 0 ){ exit; }
+}
+
+close(IOUT);
+system("rm -f _____temp_tutu01_____");
+
+# creates new stubs file if successful
+if( $success == 1 ){
+  print "\n\nsuccessfully extracted declarations \n\n";
+  system("cp -p $outfile $outfile.bak");
+  system("cp -p _____temp_tutu_____ $outfile");
+  print "created new: $outfile \n";
+}
+system("rm -f _____temp_tutu_____");
+
+


Property changes on: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/utils/create_specfem3D_gpu_cuda_method_stubs.pl
___________________________________________________________________
Name: svn:executable
   + *



More information about the CIG-COMMITS mailing list