[cig-commits] r22971 - in seismo/3D/SPECFEM3D_GLOBE/trunk: . setup src/cuda src/meshfem3D src/shared src/specfem3D
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Wed Oct 23 07:27:26 PDT 2013
Author: danielpeter
Date: 2013-10-23 07:27:26 -0700 (Wed, 23 Oct 2013)
New Revision: 22971
Modified:
seismo/3D/SPECFEM3D_GLOBE/trunk/configure
seismo/3D/SPECFEM3D_GLOBE/trunk/setup/config.fh.in
seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/model_ppm.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/model_s40rts.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/adios_helpers_definitions.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/adios_method_stubs.c
seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/broadcast_computed_parameters.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/intgrl.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/param_reader.c
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_att_memory.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_strain.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_noDev.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_noDev.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_GLL_points.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_sources_receivers.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90
Log:
updates vectorization routines to reduce code redundancy
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/configure
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/configure 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/configure 2013-10-23 14:27:26 UTC (rev 22971)
@@ -1381,7 +1381,7 @@
solver in double precision [default=no]
--enable-vtk build VTK enabled version [default=no]
--enable-vectorization build FORCE_VECTORIZATION enabled version
- [default=yes]
+ [default=no]
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@@ -2781,8 +2781,6 @@
if test "${enable_vectorization+set}" = set; then :
enableval=$enable_vectorization; want_vec="$enableval"
else
-# want_vec=yes
-## DK DK changed the default to no_vectorization
want_vec=no
fi
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/setup/config.fh.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/setup/config.fh.in 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/setup/config.fh.in 2013-10-23 14:27:26 UTC (rev 22971)
@@ -1,3 +1,9 @@
+!-----------------------------------------------------------------------
+!
+! ADIOS
+!
+!-----------------------------------------------------------------------
+
! for ADIOS header informations
! configuration flags to be saved in the adios output file.
@@ -17,3 +23,32 @@
#else
#define STRINGIFY_VAR(a) "a", a
#endif
+
+!-----------------------------------------------------------------------
+!
+! Force vectorization
+!
+!-----------------------------------------------------------------------
+
+! macros for vectorization
+
+! switches indexing between: array( i,j,k .. ) <-> array( ijk,1,1 .. )
+#ifdef FORCE_VECTORIZATION
+# define INDEX_IJK ijk,1,1
+#else
+# define INDEX_IJK i,j,k
+#endif
+
+! switches do-loops between: do k=1,NGLLZ; do j=1,NGLLY; do i=1,NGLLX <-> do ijk=1,NGLLCUBE
+#ifdef FORCE_VECTORIZATION
+# define DO_LOOP_IJK do ijk=1,NGLLCUBE
+#else
+# define DO_LOOP_IJK do k=1,NGLLZ; do j=1,NGLLY; do i=1,NGLLX
+#endif
+
+! switches enddo-loops between: enddo; enddo; enddo ! NGLLZ,NGLLY,NGLLX <-> enddo ! NGLLCUBE
+#ifdef FORCE_VECTORIZATION
+# define ENDDO_LOOP_IJK enddo ! NGLLCUBE
+#else
+# define ENDDO_LOOP_IJK enddo; enddo; enddo ! NGLLZ,NGLLY,NGLLX
+#endif
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/prepare_mesh_constants_cuda.cu 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/prepare_mesh_constants_cuda.cu 2013-10-23 14:27:26 UTC (rev 22971)
@@ -1863,7 +1863,7 @@
print_CUDA_error_if_any(cudaGetTextureReference(&d_b_accel_ic_tex_ref_ptr, "d_b_accel_ic_tex"), 6023);
print_CUDA_error_if_any(cudaBindTexture(0, d_b_accel_ic_tex_ref_ptr, mp->d_b_accel_inner_core,
&channelDesc, sizeof(realw)*size), 6023);
-
+
}
#else
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2013-10-23 14:27:26 UTC (rev 22971)
@@ -1,4 +1,4 @@
-/*
+/*
!=====================================================================
!
! S p e c f e m 3 D G l o b e V e r s i o n 6 . 0
@@ -34,8 +34,8 @@
typedef float realw;
-
+
//
// src/cuda/assemble_MPI_scalar_cuda.cu
//
@@ -43,12 +43,12 @@
void FC_FUNC_(transfer_boun_pot_from_device,
TRANSFER_BOUN_POT_FROM_DEVICE)(long* Mesh_pointer_f,
realw* send_potential_dot_dot_buffer,
- int* FORWARD_OR_ADJOINT){}
+ int* FORWARD_OR_ADJOINT){}
void FC_FUNC_(transfer_asmbl_pot_to_device,
TRANSFER_ASMBL_POT_TO_DEVICE)(long* Mesh_pointer,
realw* buffer_recv_scalar,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
//
@@ -59,13 +59,13 @@
TRANSFER_BOUN_ACCEL_FROM_DEVICE)(long* Mesh_pointer_f,
realw* send_accel_buffer,
int* IREGION,
- int* FORWARD_OR_ADJOINT){}
+ int* FORWARD_OR_ADJOINT){}
void FC_FUNC_(transfer_asmbl_accel_to_device,
TRANSFER_ASMBL_ACCEL_TO_DEVICE)(long* Mesh_pointer,
realw* buffer_recv_vector,
int* IREGION,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
//
@@ -73,58 +73,58 @@
//
void FC_FUNC_(pause_for_debug,
- PAUSE_FOR_DEBUG)() {}
+ PAUSE_FOR_DEBUG)() {}
void FC_FUNC_(output_free_device_memory,
- OUTPUT_FREE_DEVICE_MEMORY)(int* myrank) {}
+ OUTPUT_FREE_DEVICE_MEMORY)(int* myrank) {}
void FC_FUNC_(get_free_device_memory,
- get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ) {}
+ get_FREE_DEVICE_MEMORY)(realw* free, realw* used, realw* total ) {}
void FC_FUNC_(check_norm_acoustic_from_device,
CHECK_NORM_ACOUSTIC_FROM_DEVICE)(realw* norm,
long* Mesh_pointer_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(check_norm_elastic_from_device,
CHECK_NORM_ELASTIC_FROM_DEVICE)(realw* norm,
long* Mesh_pointer_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(check_norm_strain_from_device,
CHECK_NORM_STRAIN_FROM_DEVICE)(realw* strain_norm,
realw* strain_norm2,
- long* Mesh_pointer_f) {}
+ long* Mesh_pointer_f) {}
void FC_FUNC_(check_max_norm_displ_gpu,
- CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {}
+ CHECK_MAX_NORM_DISPL_GPU)(int* size, realw* displ,long* Mesh_pointer_f,int* announceID) {}
void FC_FUNC_(check_max_norm_vector,
- CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID) {}
+ CHECK_MAX_NORM_VECTOR)(int* size, realw* vector1, int* announceID) {}
void FC_FUNC_(check_max_norm_displ,
- CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID) {}
+ CHECK_MAX_NORM_DISPL)(int* size, realw* displ, int* announceID) {}
void FC_FUNC_(check_max_norm_b_displ_gpu,
- CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID) {}
+ CHECK_MAX_NORM_B_DISPL_GPU)(int* size, realw* b_displ,long* Mesh_pointer_f,int* announceID) {}
void FC_FUNC_(check_max_norm_b_accel_gpu,
- CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID) {}
+ CHECK_MAX_NORM_B_ACCEL_GPU)(int* size, realw* b_accel,long* Mesh_pointer_f,int* announceID) {}
void FC_FUNC_(check_max_norm_b_veloc_gpu,
- CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID) {}
+ CHECK_MAX_NORM_B_VELOC_GPU)(int* size, realw* b_veloc,long* Mesh_pointer_f,int* announceID) {}
void FC_FUNC_(check_max_norm_b_displ,
- CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID) {}
+ CHECK_MAX_NORM_B_DISPL)(int* size, realw* b_displ,int* announceID) {}
void FC_FUNC_(check_max_norm_b_accel,
- CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID) {}
+ CHECK_MAX_NORM_B_ACCEL)(int* size, realw* b_accel,int* announceID) {}
void FC_FUNC_(check_error_vectors,
- CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2) {}
+ CHECK_ERROR_VECTORS)(int* sizef, realw* vector1,realw* vector2) {}
void FC_FUNC_(get_max_accel,
- GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {}
+ GET_MAX_ACCEL)(int* itf,int* sizef,long* Mesh_pointer) {}
//
@@ -134,12 +134,12 @@
void FC_FUNC_(compute_add_sources_cuda,
COMPUTE_ADD_SOURCES_CUDA)(long* Mesh_pointer_f,
int* NSOURCESf,
- double* h_stf_pre_compute) {}
+ double* h_stf_pre_compute) {}
void FC_FUNC_(compute_add_sources_backward_cuda,
COMPUTE_ADD_SOURCES_BACKWARD_CUDA)(long* Mesh_pointer_f,
int* NSOURCESf,
- double* h_stf_pre_compute) {}
+ double* h_stf_pre_compute) {}
void FC_FUNC_(compute_add_sources_adjoint_cuda,
COMPUTE_ADD_SOURCES_ADJOINT_CUDA)(long* Mesh_pointer,
@@ -147,7 +147,7 @@
realw* h_adj_sourcearrays,
int* h_islice_selected_rec,
int* h_ispec_selected_rec,
- int* time_index) {}
+ int* time_index) {}
//
@@ -156,23 +156,23 @@
void FC_FUNC_(compute_coupling_fluid_cmb_cuda,
COMPUTE_COUPLING_FLUID_CMB_CUDA)(long* Mesh_pointer_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(compute_coupling_fluid_icb_cuda,
COMPUTE_COUPLING_FLUID_ICB_CUDA)(long* Mesh_pointer_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(compute_coupling_cmb_fluid_cuda,
COMPUTE_COUPLING_CMB_FLUID_CUDA)(long* Mesh_pointer_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(compute_coupling_icb_fluid_cuda,
COMPUTE_COUPLING_ICB_FLUID_CUDA)(long* Mesh_pointer_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(compute_coupling_ocean_cuda,
COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
//
@@ -182,7 +182,7 @@
void FC_FUNC_(compute_forces_crust_mantle_cuda,
COMPUTE_FORCES_CRUST_MANTLE_CUDA)(long* Mesh_pointer_f,
int* iphase,
- int* FORWARD_OR_ADJOINT_f) {}
+ int* FORWARD_OR_ADJOINT_f) {}
//
@@ -192,7 +192,7 @@
void FC_FUNC_(compute_forces_inner_core_cuda,
COMPUTE_FORCES_INNER_CORE_CUDA)(long* Mesh_pointer_f,
int* iphase,
- int* FORWARD_OR_ADJOINT_f) {}
+ int* FORWARD_OR_ADJOINT_f) {}
//
@@ -203,7 +203,7 @@
COMPUTE_FORCES_OUTER_CORE_CUDA)(long* Mesh_pointer_f,
int* iphase,
realw* time_f,
- int* FORWARD_OR_ADJOINT_f) {}
+ int* FORWARD_OR_ADJOINT_f) {}
//
@@ -211,22 +211,22 @@
//
void FC_FUNC_(compute_kernels_cm_cuda,
- COMPUTE_KERNELS_CM_CUDA)(long* Mesh_pointer,realw* deltat_f) {}
+ COMPUTE_KERNELS_CM_CUDA)(long* Mesh_pointer,realw* deltat_f) {}
void FC_FUNC_(compute_kernels_ic_cuda,
- COMPUTE_KERNELS_IC_CUDA)(long* Mesh_pointer,realw* deltat_f) {}
+ COMPUTE_KERNELS_IC_CUDA)(long* Mesh_pointer,realw* deltat_f) {}
void FC_FUNC_(compute_kernels_oc_cuda,
- COMPUTE_KERNELS_OC_CUDA)(long* Mesh_pointer,realw* deltat_f) {}
+ COMPUTE_KERNELS_OC_CUDA)(long* Mesh_pointer,realw* deltat_f) {}
void FC_FUNC_(compute_kernels_strgth_noise_cu,
COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer,
realw* h_noise_surface_movie,
- realw* deltat_f) {}
+ realw* deltat_f) {}
void FC_FUNC_(compute_kernels_hess_cuda,
COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
- realw* deltat_f) {}
+ realw* deltat_f) {}
//
@@ -236,16 +236,16 @@
void FC_FUNC_(compute_stacey_acoustic_cuda,
COMPUTE_STACEY_ACOUSTIC_CUDA)(long* Mesh_pointer_f,
realw* absorb_potential,
- int* itype) {}
+ int* itype) {}
void FC_FUNC_(compute_stacey_acoustic_backward_cuda,
COMPUTE_STACEY_ACOUSTIC_BACKWARD_CUDA)(long* Mesh_pointer_f,
realw* absorb_potential,
- int* itype) {}
+ int* itype) {}
void FC_FUNC_(compute_stacey_acoustic_undoatt_cuda,
COMPUTE_STACEY_ACOUSTIC_UNDOATT_CUDA)(long* Mesh_pointer_f,
- int* itype) {}
+ int* itype) {}
//
@@ -255,16 +255,16 @@
void FC_FUNC_(compute_stacey_elastic_cuda,
COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
realw* absorb_field,
- int* itype) {}
+ int* itype) {}
void FC_FUNC_(compute_stacey_elastic_backward_cuda,
COMPUTE_STACEY_ELASTIC_BACKWARD_CUDA)(long* Mesh_pointer_f,
realw* absorb_field,
- int* itype) {}
+ int* itype) {}
void FC_FUNC_(compute_stacey_elastic_undoatt_cuda,
COMPUTE_STACEY_ELASTIC_UNDOATT_CUDA)(long* Mesh_pointer_f,
- int* itype) {}
+ int* itype) {}
//
@@ -272,39 +272,39 @@
//
void FC_FUNC_(initialize_cuda_device,
- INITIALIZE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices) {
+ INITIALIZE_CUDA_DEVICE)(int* myrank_f,int* ncuda_devices) {
fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\n");
exit(1);
-}
+}
//
// src/cuda/noise_tomography_cuda.cu
//
-void FC_FUNC_(fortranflush,FORTRANFLUSH)(int* rank){}
+void FC_FUNC_(fortranflush,FORTRANFLUSH)(int* rank){}
-void FC_FUNC_(fortranprint,FORTRANPRINT)(int* id) {}
+void FC_FUNC_(fortranprint,FORTRANPRINT)(int* id) {}
-void FC_FUNC_(fortranprintf,FORTRANPRINTF)(realw* val) {}
+void FC_FUNC_(fortranprintf,FORTRANPRINTF)(realw* val) {}
-void FC_FUNC_(fortranprintd,FORTRANPRINTD)(double* val) {}
+void FC_FUNC_(fortranprintd,FORTRANPRINTD)(double* val) {}
-void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,realw* h_displ) {}
+void FC_FUNC_(make_displ_rand,MAKE_DISPL_RAND)(long* Mesh_pointer_f,realw* h_displ) {}
void FC_FUNC_(noise_transfer_surface_to_host,
NOISE_TRANSFER_SURFACE_TO_HOST)(long* Mesh_pointer_f,
- realw* h_noise_surface_movie) {}
+ realw* h_noise_surface_movie) {}
void FC_FUNC_(noise_add_source_master_rec_cu,
NOISE_ADD_SOURCE_MASTER_REC_CU)(long* Mesh_pointer_f,
int* it_f,
int* irec_master_noise_f,
- int* islice_selected_rec) {}
+ int* islice_selected_rec) {}
void FC_FUNC_(noise_add_surface_movie_cuda,
NOISE_ADD_SURFACE_MOVIE_CUDA)(long* Mesh_pointer_f,
- realw* h_noise_surface_movie) {}
+ realw* h_noise_surface_movie) {}
//
@@ -339,7 +339,7 @@
int* SAVE_BOUNDARY_MESH_f,
int* USE_MESH_COLORING_GPU_f,
int* ANISOTROPIC_KL_f,int* APPROXIMATE_HESS_KL_f,
- realw* deltat_f,realw* b_deltat_f) {}
+ realw* deltat_f,realw* b_deltat_f) {}
void FC_FUNC_(prepare_fields_rotation_device,
PREPARE_FIELDS_ROTATION_DEVICE)(long* Mesh_pointer_f,
@@ -349,7 +349,7 @@
realw* b_two_omega_earth_f,
realw* b_A_array_rotation,
realw* b_B_array_rotation,
- int* NSPEC_OUTER_CORE_ROTATION) {}
+ int* NSPEC_OUTER_CORE_ROTATION) {}
void FC_FUNC_(prepare_fields_gravity_device,
PREPARE_FIELDS_gravity_DEVICE)(long* Mesh_pointer_f,
@@ -363,7 +363,7 @@
realw* minus_g_icb,
realw* minus_g_cmb,
double* RHO_BOTTOM_OC,
- double* RHO_TOP_OC) {}
+ double* RHO_TOP_OC) {}
void FC_FUNC_(prepare_fields_attenuat_device,
PREPARE_FIELDS_ATTENUAT_DEVICE)(long* Mesh_pointer_f,
@@ -392,7 +392,7 @@
realw* factor_common_inner_core,
realw* one_minus_sum_beta_inner_core,
realw* alphaval,realw* betaval,realw* gammaval,
- realw* b_alphaval,realw* b_betaval,realw* b_gammaval) {}
+ realw* b_alphaval,realw* b_betaval,realw* b_gammaval) {}
void FC_FUNC_(prepare_fields_strain_device,
PREPARE_FIELDS_STRAIN_DEVICE)(long* Mesh_pointer_f,
@@ -419,7 +419,7 @@
realw* b_epsilondev_xz_inner_core,
realw* b_epsilondev_yz_inner_core,
realw* eps_trace_over_3_inner_core,
- realw* b_eps_trace_over_3_inner_core) {}
+ realw* b_eps_trace_over_3_inner_core) {}
void FC_FUNC_(prepare_fields_absorb_device,
PREPARE_FIELDS_ABSORB_DEVICE)(long* Mesh_pointer_f,
@@ -448,7 +448,7 @@
int* ibelm_ymin_outer_core,int* ibelm_ymax_outer_core,
realw* jacobian2D_xmin_outer_core, realw* jacobian2D_xmax_outer_core,
realw* jacobian2D_ymin_outer_core, realw* jacobian2D_ymax_outer_core,
- realw* vp_outer_core) {}
+ realw* vp_outer_core) {}
void FC_FUNC_(prepare_mpi_buffers_device,
PREPARE_MPI_BUFFERS_DEVICE)(long* Mesh_pointer_f,
@@ -463,7 +463,7 @@
int* num_interfaces_outer_core,
int* max_nibool_interfaces_oc,
int* nibool_interfaces_outer_core,
- int* ibool_interfaces_outer_core){}
+ int* ibool_interfaces_outer_core){}
void FC_FUNC_(prepare_fields_noise_device,
PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
@@ -475,14 +475,14 @@
realw* normal_y_noise,
realw* normal_z_noise,
realw* mask_noise,
- realw* jacobian2D_top_crust_mantle) {}
+ realw* jacobian2D_top_crust_mantle) {}
void FC_FUNC_(prepare_oceans_device,
PREPARE_OCEANS_DEVICE)(long* Mesh_pointer_f,
int* npoin_oceans,
int* h_iglob_ocean_load,
realw* h_rmass_ocean_load_selected,
- realw* h_normal_ocean_load) {}
+ realw* h_normal_ocean_load) {}
void FC_FUNC_(prepare_crust_mantle_device,
PREPARE_CRUST_MANTLE_DEVICE)(long* Mesh_pointer_f,
@@ -514,7 +514,7 @@
int* NCHUNKS_VAL,
int* num_colors_outer,
int* num_colors_inner,
- int* num_elem_colors) {}
+ int* num_elem_colors) {}
void FC_FUNC_(prepare_outer_core_device,
PREPARE_OUTER_CORE_DEVICE)(long* Mesh_pointer_f,
@@ -539,7 +539,7 @@
int* h_ibelm_bottom_outer_core,
int* num_colors_outer,
int* num_colors_inner,
- int* num_elem_colors) {}
+ int* num_elem_colors) {}
void FC_FUNC_(prepare_inner_core_device,
PREPARE_INNER_CORE_DEVICE)(long* Mesh_pointer_f,
@@ -562,11 +562,11 @@
int* h_ibelm_top_inner_core,
int* num_colors_outer,
int* num_colors_inner,
- int* num_elem_colors) {}
+ int* num_elem_colors) {}
void FC_FUNC_(prepare_cleanup_device,
PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f,
- int* NCHUNKS_VAL) {}
+ int* NCHUNKS_VAL) {}
//
@@ -574,88 +574,88 @@
//
void FC_FUNC_(transfer_fields_cm_to_device,
- TRANSFER_FIELDS_CM_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+ TRANSFER_FIELDS_CM_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_fields_ic_to_device,
- TRANSFER_FIELDS_IC_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+ TRANSFER_FIELDS_IC_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_fields_oc_to_device,
- TRANSFER_FIELDS_OC_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+ TRANSFER_FIELDS_OC_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_fields_cm_to_device,
TRANSFER_FIELDS_B_CM_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
- long* Mesh_pointer_f) {}
+ long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_fields_ic_to_device,
TRANSFER_FIELDS_B_IC_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
- long* Mesh_pointer_f) {}
+ long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_fields_oc_to_device,
TRANSFER_FIELDS_B_OC_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
- long* Mesh_pointer_f) {}
+ long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_fields_cm_from_device,
- TRANSFER_FIELDS_CM_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+ TRANSFER_FIELDS_CM_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_fields_ic_from_device,
- TRANSFER_FIELDS_IC_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+ TRANSFER_FIELDS_IC_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_fields_oc_from_device,
- TRANSFER_FIELDS_OC_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
+ TRANSFER_FIELDS_OC_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_fields_cm_from_device,
TRANSFER_B_FIELDS_CM_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
- long* Mesh_pointer_f) {}
+ long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_fields_ic_from_device,
TRANSFER_B_FIELDS_IC_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
- long* Mesh_pointer_f) {}
+ long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_fields_oc_from_device,
TRANSFER_B_FIELDS_OC_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
- long* Mesh_pointer_f) {}
+ long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_displ_cm_from_device,
- TRANSFER_DISPL_CM_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+ TRANSFER_DISPL_CM_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_displ_cm_from_device,
- TRANSFER_B_DISPL_CM_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+ TRANSFER_B_DISPL_CM_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_displ_ic_from_device,
- TRANSFER_DISPL_IC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+ TRANSFER_DISPL_IC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_displ_ic_from_device,
- TRANSFER_B_DISPL_IC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+ TRANSFER_B_DISPL_IC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_displ_oc_from_device,
- TRANSFER_DISPL_OC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+ TRANSFER_DISPL_OC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_displ_oc_from_device,
- TRANSFER_B_DISPL_OC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+ TRANSFER_B_DISPL_OC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_veloc_cm_from_device,
- TRANSFER_VELOC_CM_FROM_DEVICE)(int* size, realw* veloc, long* Mesh_pointer_f) {}
+ TRANSFER_VELOC_CM_FROM_DEVICE)(int* size, realw* veloc, long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_veloc_ic_from_device,
- TRANSFER_VELOC_IC_FROM_DEVICE)(int* size, realw* veloc, long* Mesh_pointer_f) {}
+ TRANSFER_VELOC_IC_FROM_DEVICE)(int* size, realw* veloc, long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_veloc_oc_from_device,
- TRANSFER_VELOC_OC_FROM_DEVICE)(int* size, realw* veloc, long* Mesh_pointer_f) {}
+ TRANSFER_VELOC_OC_FROM_DEVICE)(int* size, realw* veloc, long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_accel_cm_to_device,
- TRANSFER_ACCEL_CM_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
+ TRANSFER_ACCEL_CM_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_accel_cm_from_device,
- TRANSFER_ACCEL_CM_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
+ TRANSFER_ACCEL_CM_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_b_accel_cm_from_device,
- TRANSFER_B_ACCEL_CM_FROM_DEVICE)(int* size, realw* b_accel,long* Mesh_pointer_f) {}
+ TRANSFER_B_ACCEL_CM_FROM_DEVICE)(int* size, realw* b_accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_accel_ic_from_device,
- TRANSFER_ACCEL_IC_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
+ TRANSFER_ACCEL_IC_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_accel_oc_from_device,
- TRANSFER_ACCEL_OC_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
+ TRANSFER_ACCEL_OC_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
void FC_FUNC_(transfer_strain_cm_from_device,
TRANSFER_STRAIN_CM_FROM_DEVICE)(long* Mesh_pointer,
@@ -664,7 +664,7 @@
realw* epsilondev_yy,
realw* epsilondev_xy,
realw* epsilondev_xz,
- realw* epsilondev_yz) {}
+ realw* epsilondev_yz) {}
void FC_FUNC_(transfer_b_strain_cm_to_device,
TRANSFER_B_STRAIN_CM_TO_DEVICE)(long* Mesh_pointer,
@@ -672,7 +672,7 @@
realw* epsilondev_yy,
realw* epsilondev_xy,
realw* epsilondev_xz,
- realw* epsilondev_yz) {}
+ realw* epsilondev_yz) {}
void FC_FUNC_(transfer_strain_ic_from_device,
TRANSFER_STRAIN_IC_FROM_DEVICE)(long* Mesh_pointer,
@@ -681,7 +681,7 @@
realw* epsilondev_yy,
realw* epsilondev_xy,
realw* epsilondev_xz,
- realw* epsilondev_yz) {}
+ realw* epsilondev_yz) {}
void FC_FUNC_(transfer_b_strain_ic_to_device,
TRANSFER_B_STRAIN_IC_TO_DEVICE)(long* Mesh_pointer,
@@ -689,7 +689,7 @@
realw* epsilondev_yy,
realw* epsilondev_xy,
realw* epsilondev_xz,
- realw* epsilondev_yz) {}
+ realw* epsilondev_yz) {}
void FC_FUNC_(transfer_b_rmemory_cm_to_device,
TRANSFER_B_RMEMORY_CM_TO_DEVICE)(long* Mesh_pointer,
@@ -697,7 +697,7 @@
realw* b_R_yy,
realw* b_R_xy,
realw* b_R_xz,
- realw* b_R_yz) {}
+ realw* b_R_yz) {}
void FC_FUNC_(transfer_b_rmemory_ic_to_device,
TRANSFER_B_RMEMORY_IC_TO_DEVICE)(long* Mesh_pointer,
@@ -705,17 +705,17 @@
realw* b_R_yy,
realw* b_R_xy,
realw* b_R_xz,
- realw* b_R_yz) {}
+ realw* b_R_yz) {}
void FC_FUNC_(transfer_rotation_from_device,
TRANSFER_ROTATION_FROM_DEVICE)(long* Mesh_pointer,
realw* A_array_rotation,
- realw* B_array_rotation) {}
+ realw* B_array_rotation) {}
void FC_FUNC_(transfer_b_rotation_to_device,
TRANSFER_B_ROTATION_TO_DEVICE)(long* Mesh_pointer,
realw* A_array_rotation,
- realw* B_array_rotation) {}
+ realw* B_array_rotation) {}
void FC_FUNC_(transfer_kernels_cm_to_host,
TRANSFER_KERNELS_CM_TO_HOST)(long* Mesh_pointer,
@@ -723,30 +723,30 @@
realw* h_alpha_kl,
realw* h_beta_kl,
realw* h_cijkl_kl,
- int* NSPEC) {}
+ int* NSPEC) {}
void FC_FUNC_(transfer_kernels_ic_to_host,
TRANSFER_KERNELS_IC_TO_HOST)(long* Mesh_pointer,
realw* h_rho_kl,
realw* h_alpha_kl,
realw* h_beta_kl,
- int* NSPEC) {}
+ int* NSPEC) {}
void FC_FUNC_(transfer_kernels_oc_to_host,
TRANSFER_KERNELS_OC_TO_HOST)(long* Mesh_pointer,
realw* h_rho_kl,
realw* h_alpha_kl,
- int* NSPEC) {}
+ int* NSPEC) {}
void FC_FUNC_(transfer_kernels_noise_to_host,
TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
realw* h_Sigma_kl,
- int* NSPEC) {}
+ int* NSPEC) {}
void FC_FUNC_(transfer_kernels_hess_cm_tohost,
TRANSFER_KERNELS_HESS_CM_TOHOST)(long* Mesh_pointer,
realw* h_hess_kl,
- int* NSPEC) {}
+ int* NSPEC) {}
//
@@ -758,39 +758,39 @@
realw* deltat_f,
realw* deltatsqover2_f,
realw* deltatover2_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(update_displacement_cm_cuda,
UPDATE_DISPLACMENT_CM_CUDA)(long* Mesh_pointer_f,
realw* deltat_f,
realw* deltatsqover2_f,
realw* deltatover2_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(update_displacement_oc_cuda,
UPDATE_DISPLACEMENT_OC_cuda)(long* Mesh_pointer_f,
realw* deltat_f,
realw* deltatsqover2_f,
realw* deltatover2_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(multiply_accel_elastic_cuda,
MULTIPLY_ACCEL_ELASTIC_CUDA)(long* Mesh_pointer,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(update_veloc_elastic_cuda,
UPDATE_VELOC_ELASTIC_CUDA)(long* Mesh_pointer,
realw* deltatover2_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(multiply_accel_acoustic_cuda,
MULTIPLY_ACCEL_ACOUSTIC_CUDA)(long* Mesh_pointer,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(update_veloc_acoustic_cuda,
UPDATE_VELOC_ACOUSTIC_CUDA)(long* Mesh_pointer,
realw* deltatover2_f,
- int* FORWARD_OR_ADJOINT) {}
+ int* FORWARD_OR_ADJOINT) {}
//
@@ -810,5 +810,5 @@
int* number_receiver_global,
int* ispec_selected_rec,
int* ispec_selected_source,
- int* ibool) {}
+ int* ibool) {}
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/model_ppm.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/model_ppm.f90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/model_ppm.f90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -110,7 +110,7 @@
call bcast_all_singlei(PPM_num_v)
call bcast_all_singlei(PPM_num_latperlon)
call bcast_all_singlei(PPM_num_lonperdepth)
-
+
if( myrank /= 0 ) then
allocate(PPM_lat(PPM_num_v),PPM_lon(PPM_num_v),PPM_depth(PPM_num_v),PPM_dvs(PPM_num_v))
endif
@@ -119,7 +119,7 @@
call bcast_all_dp(PPM_lat(1:PPM_num_v),PPM_num_v)
call bcast_all_dp(PPM_lon(1:PPM_num_v),PPM_num_v)
call bcast_all_dp(PPM_depth(1:PPM_num_v),PPM_num_v)
-
+
call bcast_all_singledp(PPM_maxlat)
call bcast_all_singledp(PPM_minlat)
call bcast_all_singledp(PPM_maxlon)
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/model_s40rts.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/model_s40rts.f90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/model_s40rts.f90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -119,12 +119,12 @@
call get_value_string(P12, 'model.P12', 'DATA/s20rts/P12.dat') !model P12 is in s20rts data directory
! S40RTS degree 40 S model from Ritsema
- open(unit=10,file=S40RTS,status='old',action='read',iostat=ier)
+ open(unit=10,file=trim(S40RTS),status='old',action='read',iostat=ier)
if ( ier /= 0 ) then
write(IMAIN,*) 'error opening "', trim(S40RTS), '": ', ier
call flush_IMAIN()
! stop
- call exit_MPI(0, 'error model s40rts')
+ call exit_MPI(0, 'error: opening model s40rts data file DATA/s40rts/S40RTS.dat failed, please check...')
endif
do k=0,NK_20
@@ -135,10 +135,10 @@
close(10)
! P12 degree 12 P model from Ritsema
- open(unit=10,file=P12,status='old',action='read',iostat=ier)
+ open(unit=10,file=trim(P12),status='old',action='read',iostat=ier)
if ( ier /= 0 ) then
write(IMAIN,*) 'error opening "', trim(P12), '": ', ier
- call exit_MPI(0, 'error model s40rts')
+ call exit_MPI(0, 'error : opening model s40rts data file DATA/s20rts/P12.dat failed, please check...')
endif
do k=0,NK_20
do l=0,12
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/adios_helpers_definitions.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/adios_helpers_definitions.f90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/adios_helpers_definitions.f90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -1268,7 +1268,7 @@
full_name = trim(path) // trim(array_name)
call define_adios_global_1d_logical_generic(adios_group, group_size_inc, trim(full_name), local_dim)
-
+
end subroutine define_adios_global_1d_logical_5d
end module adios_helpers_definitions_mod
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/adios_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/adios_method_stubs.c 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/adios_method_stubs.c 2013-10-23 14:27:26 UTC (rev 22971)
@@ -1,4 +1,4 @@
-/*
+/*
!=====================================================================
!
! S p e c f e m 3 D G l o b e V e r s i o n 6 . 0
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/broadcast_computed_parameters.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/broadcast_computed_parameters.f90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/broadcast_computed_parameters.f90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -93,7 +93,7 @@
USE_LDDRK,INCREASE_CFL_FOR_LDDRK, &
ANISOTROPIC_KL,SAVE_TRANSVERSE_KL_ONLY,APPROXIMATE_HESS_KL, &
USE_FULL_TISO_MANTLE,SAVE_SOURCE_MASK, &
- EXACT_MASS_MATRIX_FOR_ROTATION,ATTENUATION_1D_WITH_3D_STORAGE, &
+ EXACT_MASS_MATRIX_FOR_ROTATION,ATTENUATION_1D_WITH_3D_STORAGE, &
GPU_MODE, &
ADIOS_ENABLED,ADIOS_FOR_FORWARD_ARRAYS, &
ADIOS_FOR_MPI_ARRAYS,ADIOS_FOR_ARRAYS_SOLVER, &
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/intgrl.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/intgrl.f90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/intgrl.f90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -33,29 +33,28 @@
implicit none
! Argument variables
- integer ner,nir
- double precision f(640),r(640),s1(640),s2(640)
- double precision s3(640),sum
+ integer :: ner,nir
+ double precision :: f(640),r(640),s1(640),s2(640)
+ double precision :: s3(640),sum
! Local variables
double precision, parameter :: third = 1.0d0/3.0d0
double precision, parameter :: fifth = 1.0d0/5.0d0
double precision, parameter :: sixth = 1.0d0/6.0d0
- double precision rji,yprime(640)
- double precision s1l,s2l,s3l
+ double precision :: rji,yprime(640)
+ double precision :: s1l,s2l,s3l
- integer i,j,n,kdis(28)
- integer ndis,nir1
+ integer :: i,j,n,kdis(28)
+ integer :: ndis,nir1
-
-
data kdis/163,323,336,517,530,540,565,590,609,619,626,633,16*0/
ndis = 12
n = 640
call deriv(f,yprime,n,r,ndis,kdis,s1,s2,s3)
+
nir1 = nir + 1
sum = 0.0d0
do i=nir1,ner
@@ -79,17 +78,17 @@
implicit none
! Argument variables
- integer kdis(28),n,ndis
- double precision r(n),s1(n),s2(n),s3(n)
- double precision y(n),yprime(n)
+ integer :: kdis(28),n,ndis
+ double precision :: r(n),s1(n),s2(n),s3(n)
+ double precision :: y(n),yprime(n)
! Local variables
- integer i,j,j1,j2
- integer k,nd,ndp
- double precision a0,b0,b1
- double precision f(3,1000),h,h2,h2a
- double precision h2b,h3a,ha,s13
- double precision s21,s32,yy(3)
+ integer :: i,j,j1,j2
+ integer :: k,nd,ndp
+ double precision :: a0,b0,b1
+ double precision :: f(3,1000),h,h2,h2a
+ double precision :: h2b,h3a,ha,s13
+ double precision :: s21,s32,yy(3)
yy(1) = 0.d0
yy(2) = 0.d0
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/param_reader.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/param_reader.c 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/param_reader.c 2013-10-23 14:27:26 UTC (rev 22971)
@@ -1,4 +1,4 @@
-/*
+/*
!=====================================================================
!
! S p e c f e m 3 D G l o b e V e r s i o n 6 . 0
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -25,17 +25,18 @@
!
!=====================================================================
+! we switch between vectorized and non-vectorized version by using pre-processor flag FORCE_VECTORIZATION
+! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
+#include "config.fh"
+
+
!--------------------------------------------------------------------------------------------
!
! isotropic element
!
!--------------------------------------------------------------------------------------------
-#ifndef FORCE_VECTORIZATION
-
- ! default, without vectorization
-
subroutine compute_element_iso(ispec, &
minus_gravity_table,density_table,minus_deriv_gravity_table, &
xstore,ystore,zstore, &
@@ -48,7 +49,7 @@
one_minus_sum_beta,vnspec, &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
dummyx_loc,dummyy_loc,dummyz_loc, &
- epsilondev_loc,rho_s_H,is_backward_field)
+ epsilondev_loc,rho_s_H)
use constants_solver
use specfem_par,only: COMPUTE_AND_STORE_STRAIN
@@ -98,8 +99,6 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
- logical :: is_backward_field
-
! local parameters
real(kind=CUSTOM_REAL) one_minus_sum_beta_use
real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
@@ -122,404 +121,50 @@
double precision factor,sx_l,sy_l,sz_l,gxl,gyl,gzl
double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
- integer :: i,j,k
integer :: int_radius
integer :: iglob
- logical :: dummyl
-
-!daniel debug
- ! dummy to avoid compiler warning
- dummyl = is_backward_field
-
- ! isotropic element
-
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- ! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(i,j,k,ispec)
- xiyl = xiy(i,j,k,ispec)
- xizl = xiz(i,j,k,ispec)
- etaxl = etax(i,j,k,ispec)
- etayl = etay(i,j,k,ispec)
- etazl = etaz(i,j,k,ispec)
- gammaxl = gammax(i,j,k,ispec)
- gammayl = gammay(i,j,k,ispec)
- gammazl = gammaz(i,j,k,ispec)
-
- ! compute the jacobian
- jacobianl = 1.0_CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
-
- duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
- duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
- duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
- duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
- duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
- duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
-
- duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
- duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
- duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
-
- ! precompute some sums to save CPU time
- duxdxl_plus_duydyl = duxdxl + duydyl
- duxdxl_plus_duzdzl = duxdxl + duzdzl
- duydyl_plus_duzdzl = duydyl + duzdzl
- duxdyl_plus_duydxl = duxdyl + duydxl
- duzdxl_plus_duxdzl = duzdxl + duxdzl
- duzdyl_plus_duydzl = duzdyl + duydzl
-
-!ZN beware, here the expression differs from the strain used in memory variable equation (6) in D. Komatitsch and J. Tromp 1999,
-!ZN here Brian Savage uses the engineering strain which are epsilon = 1/2*(grad U + (grad U)^T),
-!ZN where U is the displacement vector and grad the gradient operator, i.e. there is a 1/2 factor difference between the two.
-!ZN Both expressions are fine, but we need to keep in mind that if we put the 1/2 factor here there we need to remove it
-!ZN from the expression in which we use the strain later in the code.
-
- ! compute deviatoric strain
- if (COMPUTE_AND_STORE_STRAIN) then
- templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
- if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
- if( ispec == 1) then
- epsilon_trace_over_3(i,j,k,1) = templ
- endif
- else
- epsilon_trace_over_3(i,j,k,ispec) = templ
- endif
- epsilondev_loc(1,i,j,k) = duxdxl - templ
- epsilondev_loc(2,i,j,k) = duydyl - templ
- epsilondev_loc(3,i,j,k) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,i,j,k) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,i,j,k) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
- endif
-
- !
- ! compute isotropic elements
- !
-
- ! layer with no transverse isotropy, use kappav and muv
- kappal = kappavstore(i,j,k,ispec)
- mul = muvstore(i,j,k,ispec)
-
- ! use unrelaxed parameters if attenuation
- if(ATTENUATION_VAL ) then
- ! precompute terms for attenuation if needed
- if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- one_minus_sum_beta_use = one_minus_sum_beta(i,j,k,ispec)
- else
- one_minus_sum_beta_use = one_minus_sum_beta(1,1,1,ispec)
- endif
- mul = mul * one_minus_sum_beta_use
- endif
-
- lambdalplus2mul = kappal + FOUR_THIRDS * mul
- lambdal = lambdalplus2mul - 2.0_CUSTOM_REAL*mul
-
- ! compute stress sigma
- sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
- sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
- sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
-
- sigma_xy = mul*duxdyl_plus_duydxl
- sigma_xz = mul*duzdxl_plus_duxdzl
- sigma_yz = mul*duzdyl_plus_duydzl
-
- ! subtract memory variables if attenuation
- if(ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL ) then
-
-!daniel debug: att - debug update
-! call compute_element_att_mem_up_cm(ispec,i,j,k, &
-! R_xx(1,i,j,k,ispec), &
-! R_yy(1,i,j,k,ispec), &
-! R_xy(1,i,j,k,ispec), &
-! R_xz(1,i,j,k,ispec), &
-! R_yz(1,i,j,k,ispec), &
-! epsilondev_loc(:,i,j,k),muvstore(i,j,k,ispec),is_backward_field)
-
- ! note: function inlining is generally done by fortran compilers;
- ! compilers decide based on performance heuristics
- ! note: fortran passes pointers to array location, thus R_memory(1,1,...) should be fine
- call compute_element_att_stress(R_xx(1,i,j,k,ispec), &
- R_yy(1,i,j,k,ispec), &
- R_xy(1,i,j,k,ispec), &
- R_xz(1,i,j,k,ispec), &
- R_yz(1,i,j,k,ispec), &
- sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
-
- endif ! ATTENUATION_VAL
-
- ! define symmetric components of sigma for gravity
- sigma_yx = sigma_xy
- sigma_zx = sigma_xz
- sigma_zy = sigma_yz
-
- ! compute non-symmetric terms for gravity
- if(GRAVITY_VAL) then
- ! use mesh coordinates to get theta and phi
- ! x y and z contain r theta and phi
- iglob = ibool(i,j,k,ispec)
-
- dtheta = dble(ystore(iglob))
- dphi = dble(zstore(iglob))
-
- cos_theta = dcos(dtheta)
- sin_theta = dsin(dtheta)
- cos_phi = dcos(dphi)
- sin_phi = dsin(dphi)
-
- cos_theta_sq = cos_theta*cos_theta
- sin_theta_sq = sin_theta*sin_theta
- cos_phi_sq = cos_phi*cos_phi
- sin_phi_sq = sin_phi*sin_phi
-
- ! get g, rho and dg/dr=dg
- ! spherical components of the gravitational acceleration
- ! for efficiency replace with lookup table every 100 m in radial direction
- radius = dble(xstore(iglob))
-
- int_radius = nint(10.d0 * radius * R_EARTH_KM )
- minus_g = minus_gravity_table(int_radius)
- minus_dg = minus_deriv_gravity_table(int_radius)
- rho = density_table(int_radius)
-
- ! Cartesian components of the gravitational acceleration
- gxl = minus_g*sin_theta*cos_phi
- gyl = minus_g*sin_theta*sin_phi
- gzl = minus_g*cos_theta
-
- ! Cartesian components of gradient of gravitational acceleration
- ! obtained from spherical components
- minus_g_over_radius = minus_g / radius
- minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius
-
- Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq
- Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq
- Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq
- Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq
- Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
- Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
-
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
-
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * dble(dummyx_loc(i,j,k)) ! dble(displ_crust_mantle(1,iglob))
- sy_l = rho * dble(dummyy_loc(i,j,k)) ! dble(displ_crust_mantle(2,iglob))
- sz_l = rho * dble(dummyz_loc(i,j,k)) ! dble(displ_crust_mantle(3,iglob))
-
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
- sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
- sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
-
- sigma_xy = sigma_xy - sngl(sx_l * gyl)
- sigma_yx = sigma_yx - sngl(sy_l * gxl)
-
- sigma_xz = sigma_xz - sngl(sx_l * gzl)
- sigma_zx = sigma_zx - sngl(sz_l * gxl)
-
- sigma_yz = sigma_yz - sngl(sy_l * gzl)
- sigma_zy = sigma_zy - sngl(sz_l * gyl)
-
- ! precompute vector
- factor = dble(jacobianl) * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
- rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
- rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
-
- else
-
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * dummyx_loc(i,j,k) ! displ_crust_mantle(1,iglob)
- sy_l = rho * dummyy_loc(i,j,k) ! displ_crust_mantle(2,iglob)
- sz_l = rho * dummyz_loc(i,j,k) ! displ_crust_mantle(3,iglob)
-
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
- sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
- sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
-
- sigma_xy = sigma_xy - sx_l * gyl
- sigma_yx = sigma_yx - sy_l * gxl
-
- sigma_xz = sigma_xz - sx_l * gzl
- sigma_zx = sigma_zx - sz_l * gxl
-
- sigma_yz = sigma_yz - sy_l * gzl
- sigma_zy = sigma_zy - sz_l * gyl
-
- ! precompute vector
- factor = jacobianl * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
-
- endif
-
- endif ! end of section with gravity terms
-
- ! form dot product with test vector, non-symmetric form
- tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
- tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
- tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
-
- tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
- tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
- tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
-
- tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
- tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
- tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
-
- enddo ! NGLLX
- enddo ! NGLLY
- enddo ! NGLLZ
-
- end subroutine compute_element_iso
-
-#else
-
-! FORCE_VECTORIZATION
-! vectorized routine
-
- subroutine compute_element_iso(ispec, &
- minus_gravity_table,density_table,minus_deriv_gravity_table, &
- xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- wgll_cube, &
- kappavstore,muvstore, &
- ibool, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilon_trace_over_3, &
- one_minus_sum_beta,vnspec, &
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
- dummyx_loc,dummyy_loc,dummyz_loc, &
- epsilondev_loc,rho_s_H,is_backward_field)
-
- use constants_solver
- use specfem_par,only: COMPUTE_AND_STORE_STRAIN
-
- implicit none
-
-
- ! element id
- integer :: ispec
-
- ! arrays with mesh parameters per slice
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool
-
- ! x y and z contain r theta and phi
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: xstore,ystore,zstore
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-
- ! array with derivatives of Lagrange polynomials and precalculated products
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
-
- ! store anisotropic properties only where needed to save memory
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
- kappavstore,muvstore
-
- ! variable sized array variables
- integer :: vnspec
-
- ! attenuation
- ! memory variables for attenuation
- ! memory variables R_ij are stored at the local rather than global level
- ! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
-
- real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
-
- real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec) :: one_minus_sum_beta
-
- ! gravity
- double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table,density_table,minus_deriv_gravity_table
-
- ! element info
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
-
- logical :: is_backward_field
-
-! local parameters
- real(kind=CUSTOM_REAL) one_minus_sum_beta_use
- real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
- real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
- real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
- real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
- real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
- real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
-
- real(kind=CUSTOM_REAL) templ
- real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
- real(kind=CUSTOM_REAL) kappal
-
- ! for gravity
- double precision dphi,dtheta
- double precision radius,rho,minus_g,minus_dg
- double precision minus_g_over_radius,minus_dg_plus_g_over_radius
- double precision cos_theta,sin_theta,cos_phi,sin_phi
- double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
- double precision factor,sx_l,sy_l,sz_l,gxl,gyl,gzl
- double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
-
- integer :: int_radius
- integer :: iglob
-
- logical :: dummyl
-
- ! force vectorization
- integer :: ijk
- real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
-
+#ifdef FORCE_VECTORIZATION
! in this vectorized version we have to assume that N_SLS == 3 in order to be able to unroll and thus suppress
! an inner loop that would otherwise prevent vectorization; this is safe in practice in all cases because N_SLS == 3
-! in all known applications, and in the main program we check that N_SLS == 3 if FORCE_VECTORIZATION is used and we stop otherwise
+! in all known applications, and in the main program we check that N_SLS == 3 if FORCE_VECTORIZATION is used and we stop
+ integer :: ijk
+ real(kind=CUSTOM_REAL) :: R_xx_val,R_yy_val
+#else
+ integer :: i,j,k
+#endif
-!daniel debug
- ! dummy to avoid compiler warning
- dummyl = is_backward_field
-
! isotropic element
- do ijk=1,NGLLCUBE
+ DO_LOOP_IJK
! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(ijk,1,1,ispec)
- xiyl = xiy(ijk,1,1,ispec)
- xizl = xiz(ijk,1,1,ispec)
- etaxl = etax(ijk,1,1,ispec)
- etayl = etay(ijk,1,1,ispec)
- etazl = etaz(ijk,1,1,ispec)
- gammaxl = gammax(ijk,1,1,ispec)
- gammayl = gammay(ijk,1,1,ispec)
- gammazl = gammaz(ijk,1,1,ispec)
+ xixl = xix(INDEX_IJK,ispec)
+ xiyl = xiy(INDEX_IJK,ispec)
+ xizl = xiz(INDEX_IJK,ispec)
+ etaxl = etax(INDEX_IJK,ispec)
+ etayl = etay(INDEX_IJK,ispec)
+ etazl = etaz(INDEX_IJK,ispec)
+ gammaxl = gammax(INDEX_IJK,ispec)
+ gammayl = gammay(INDEX_IJK,ispec)
+ gammazl = gammaz(INDEX_IJK,ispec)
! compute the jacobian
jacobianl = 1.0_CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- xiyl*(etaxl*gammazl-etazl*gammaxl) &
+ xizl*(etaxl*gammayl-etayl*gammaxl))
- duxdxl = xixl*tempx1(ijk,1,1) + etaxl*tempx2(ijk,1,1) + gammaxl*tempx3(ijk,1,1)
- duxdyl = xiyl*tempx1(ijk,1,1) + etayl*tempx2(ijk,1,1) + gammayl*tempx3(ijk,1,1)
- duxdzl = xizl*tempx1(ijk,1,1) + etazl*tempx2(ijk,1,1) + gammazl*tempx3(ijk,1,1)
+ duxdxl = xixl*tempx1(INDEX_IJK) + etaxl*tempx2(INDEX_IJK) + gammaxl*tempx3(INDEX_IJK)
+ duxdyl = xiyl*tempx1(INDEX_IJK) + etayl*tempx2(INDEX_IJK) + gammayl*tempx3(INDEX_IJK)
+ duxdzl = xizl*tempx1(INDEX_IJK) + etazl*tempx2(INDEX_IJK) + gammazl*tempx3(INDEX_IJK)
- duydxl = xixl*tempy1(ijk,1,1) + etaxl*tempy2(ijk,1,1) + gammaxl*tempy3(ijk,1,1)
- duydyl = xiyl*tempy1(ijk,1,1) + etayl*tempy2(ijk,1,1) + gammayl*tempy3(ijk,1,1)
- duydzl = xizl*tempy1(ijk,1,1) + etazl*tempy2(ijk,1,1) + gammazl*tempy3(ijk,1,1)
+ duydxl = xixl*tempy1(INDEX_IJK) + etaxl*tempy2(INDEX_IJK) + gammaxl*tempy3(INDEX_IJK)
+ duydyl = xiyl*tempy1(INDEX_IJK) + etayl*tempy2(INDEX_IJK) + gammayl*tempy3(INDEX_IJK)
+ duydzl = xizl*tempy1(INDEX_IJK) + etazl*tempy2(INDEX_IJK) + gammazl*tempy3(INDEX_IJK)
- duzdxl = xixl*tempz1(ijk,1,1) + etaxl*tempz2(ijk,1,1) + gammaxl*tempz3(ijk,1,1)
- duzdyl = xiyl*tempz1(ijk,1,1) + etayl*tempz2(ijk,1,1) + gammayl*tempz3(ijk,1,1)
- duzdzl = xizl*tempz1(ijk,1,1) + etazl*tempz2(ijk,1,1) + gammazl*tempz3(ijk,1,1)
+ duzdxl = xixl*tempz1(INDEX_IJK) + etaxl*tempz2(INDEX_IJK) + gammaxl*tempz3(INDEX_IJK)
+ duzdyl = xiyl*tempz1(INDEX_IJK) + etayl*tempz2(INDEX_IJK) + gammayl*tempz3(INDEX_IJK)
+ duzdzl = xizl*tempz1(INDEX_IJK) + etazl*tempz2(INDEX_IJK) + gammazl*tempz3(INDEX_IJK)
! precompute some sums to save CPU time
duxdxl_plus_duydyl = duxdxl + duydyl
@@ -538,18 +183,18 @@
! compute deviatoric strain
if (COMPUTE_AND_STORE_STRAIN) then
templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
- if( NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1 ) then
+ if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
if( ispec == 1) then
- epsilon_trace_over_3(ijk,1,1,1) = templ
+ epsilon_trace_over_3(INDEX_IJK,1) = templ
endif
else
- epsilon_trace_over_3(ijk,1,1,ispec) = templ
+ epsilon_trace_over_3(INDEX_IJK,ispec) = templ
endif
- epsilondev_loc(1,ijk,1,1) = duxdxl - templ
- epsilondev_loc(2,ijk,1,1) = duydyl - templ
- epsilondev_loc(3,ijk,1,1) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,ijk,1,1) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,ijk,1,1) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
+ epsilondev_loc(1,INDEX_IJK) = duxdxl - templ
+ epsilondev_loc(2,INDEX_IJK) = duydyl - templ
+ epsilondev_loc(3,INDEX_IJK) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_loc(4,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_loc(5,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
endif
!
@@ -557,14 +202,14 @@
!
! layer with no transverse isotropy, use kappav and muv
- kappal = kappavstore(ijk,1,1,ispec)
- mul = muvstore(ijk,1,1,ispec)
+ kappal = kappavstore(INDEX_IJK,ispec)
+ mul = muvstore(INDEX_IJK,ispec)
! use unrelaxed parameters if attenuation
- if(ATTENUATION_VAL) then
+ if(ATTENUATION_VAL ) then
! precompute terms for attenuation if needed
if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- one_minus_sum_beta_use = one_minus_sum_beta(ijk,1,1,ispec)
+ one_minus_sum_beta_use = one_minus_sum_beta(INDEX_IJK,ispec)
else
one_minus_sum_beta_use = one_minus_sum_beta(1,1,1,ispec)
endif
@@ -584,35 +229,61 @@
sigma_yz = mul*duzdyl_plus_duydzl
! subtract memory variables if attenuation
- if(ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL) then
- ! do NOT put this is a subroutine, otherwise the call to the subroutine prevents compilers from vectorizing the outer loop
- ! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop in order to vectorize the outer loop
- R_xx_val = R_xx(1,ijk,1,1,ispec)
- R_yy_val = R_yy(1,ijk,1,1,ispec)
+ if(ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL ) then
+
+#ifdef FORCE_VECTORIZATION
+ ! do NOT put this is a subroutine, otherwise the call to the subroutine prevents compilers
+ !from vectorizing the outer loop
+
+ ! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop
+ ! in order to vectorize the outer loop
+ R_xx_val = R_xx(1,INDEX_IJK,ispec)
+ R_yy_val = R_yy(1,INDEX_IJK,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(1,ijk,1,1,ispec)
- sigma_xz = sigma_xz - R_xz(1,ijk,1,1,ispec)
- sigma_yz = sigma_yz - R_yz(1,ijk,1,1,ispec)
+ sigma_xy = sigma_xy - R_xy(1,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(1,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(1,INDEX_IJK,ispec)
- R_xx_val = R_xx(2,ijk,1,1,ispec)
- R_yy_val = R_yy(2,ijk,1,1,ispec)
+ R_xx_val = R_xx(2,INDEX_IJK,ispec)
+ R_yy_val = R_yy(2,INDEX_IJK,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(2,ijk,1,1,ispec)
- sigma_xz = sigma_xz - R_xz(2,ijk,1,1,ispec)
- sigma_yz = sigma_yz - R_yz(2,ijk,1,1,ispec)
+ sigma_xy = sigma_xy - R_xy(2,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(2,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(2,INDEX_IJK,ispec)
- R_xx_val = R_xx(3,ijk,1,1,ispec)
- R_yy_val = R_yy(3,ijk,1,1,ispec)
+ R_xx_val = R_xx(3,INDEX_IJK,ispec)
+ R_yy_val = R_yy(3,INDEX_IJK,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(3,ijk,1,1,ispec)
- sigma_xz = sigma_xz - R_xz(3,ijk,1,1,ispec)
- sigma_yz = sigma_yz - R_yz(3,ijk,1,1,ispec)
+ sigma_xy = sigma_xy - R_xy(3,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(3,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(3,INDEX_IJK,ispec)
+#else
+!daniel debug: att - debug update
+! call compute_element_att_mem_up_cm(ispec,INDEX_IJK, &
+! R_xx(1,INDEX_IJK,ispec), &
+! R_yy(1,INDEX_IJK,ispec), &
+! R_xy(1,INDEX_IJK,ispec), &
+! R_xz(1,INDEX_IJK,ispec), &
+! R_yz(1,INDEX_IJK,ispec), &
+! epsilondev_loc(:,INDEX_IJK),muvstore(INDEX_IJK,ispec),is_backward_field)
+
+ ! note: function inlining is generally done by fortran compilers;
+ ! compilers decide based on performance heuristics
+ ! note: fortran passes pointers to array location, thus R_memory(1,1,...) should be fine
+ call compute_element_att_stress(R_xx(1,INDEX_IJK,ispec), &
+ R_yy(1,INDEX_IJK,ispec), &
+ R_xy(1,INDEX_IJK,ispec), &
+ R_xz(1,INDEX_IJK,ispec), &
+ R_yz(1,INDEX_IJK,ispec), &
+ sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
+#endif
+
endif ! ATTENUATION_VAL
! define symmetric components of sigma for gravity
@@ -624,7 +295,7 @@
if(GRAVITY_VAL) then
! use mesh coordinates to get theta and phi
! x y and z contain r theta and phi
- iglob = ibool(ijk,1,1,ispec)
+ iglob = ibool(INDEX_IJK,ispec)
dtheta = dble(ystore(iglob))
dphi = dble(zstore(iglob))
@@ -666,602 +337,89 @@
Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * dummyx_loc(ijk,1,1)
- sy_l = rho * dummyy_loc(ijk,1,1)
- sz_l = rho * dummyz_loc(ijk,1,1)
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
- sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
- sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
+ ! get displacement and multiply by density to compute G tensor
+ sx_l = rho * dble(dummyx_loc(INDEX_IJK)) ! dble(displ_crust_mantle(1,iglob))
+ sy_l = rho * dble(dummyy_loc(INDEX_IJK)) ! dble(displ_crust_mantle(2,iglob))
+ sz_l = rho * dble(dummyz_loc(INDEX_IJK)) ! dble(displ_crust_mantle(3,iglob))
- sigma_xy = sigma_xy - sx_l * gyl
- sigma_yx = sigma_yx - sy_l * gxl
+ ! compute G tensor from s . g and add to sigma (not symmetric)
+ sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
+ sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
+ sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
- sigma_xz = sigma_xz - sx_l * gzl
- sigma_zx = sigma_zx - sz_l * gxl
+ sigma_xy = sigma_xy - sngl(sx_l * gyl)
+ sigma_yx = sigma_yx - sngl(sy_l * gxl)
- sigma_yz = sigma_yz - sy_l * gzl
- sigma_zy = sigma_zy - sz_l * gyl
+ sigma_xz = sigma_xz - sngl(sx_l * gzl)
+ sigma_zx = sigma_zx - sngl(sz_l * gxl)
- ! precompute vector
- factor = jacobianl * wgll_cube(ijk,1,1)
- rho_s_H(1,ijk,1,1) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,ijk,1,1) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,ijk,1,1) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
- endif ! end of section with gravity terms
+ sigma_yz = sigma_yz - sngl(sy_l * gzl)
+ sigma_zy = sigma_zy - sngl(sz_l * gyl)
- ! form dot product with test vector, non-symmetric form
- tempx1(ijk,1,1) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
- tempy1(ijk,1,1) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
- tempz1(ijk,1,1) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
+ ! precompute vector
+ factor = dble(jacobianl) * wgll_cube(INDEX_IJK)
+ rho_s_H(1,INDEX_IJK) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+ rho_s_H(2,INDEX_IJK) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+ rho_s_H(3,INDEX_IJK) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
- tempx2(ijk,1,1) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
- tempy2(ijk,1,1) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
- tempz2(ijk,1,1) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
+ else
- tempx3(ijk,1,1) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
- tempy3(ijk,1,1) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
- tempz3(ijk,1,1) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
- enddo
+ ! get displacement and multiply by density to compute G tensor
+ sx_l = rho * dummyx_loc(INDEX_IJK) ! displ_crust_mantle(1,iglob)
+ sy_l = rho * dummyy_loc(INDEX_IJK) ! displ_crust_mantle(2,iglob)
+ sz_l = rho * dummyz_loc(INDEX_IJK) ! displ_crust_mantle(3,iglob)
- end subroutine compute_element_iso
+ ! compute G tensor from s . g and add to sigma (not symmetric)
+ sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
+ sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
+ sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
-! end of FORCE_VECTORIZATION
-#endif
+ sigma_xy = sigma_xy - sx_l * gyl
+ sigma_yx = sigma_yx - sy_l * gxl
-!--------------------------------------------------------------------------------------------
-!
-! transversely isotropic element
-!
-!--------------------------------------------------------------------------------------------
+ sigma_xz = sigma_xz - sx_l * gzl
+ sigma_zx = sigma_zx - sz_l * gxl
-#ifndef FORCE_VECTORIZATION
+ sigma_yz = sigma_yz - sy_l * gzl
+ sigma_zy = sigma_zy - sz_l * gyl
- ! default, without vectorization
+ ! precompute vector
+ factor = jacobianl * wgll_cube(INDEX_IJK)
+ rho_s_H(1,INDEX_IJK) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+ rho_s_H(2,INDEX_IJK) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+ rho_s_H(3,INDEX_IJK) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
- subroutine compute_element_tiso(ispec, &
- minus_gravity_table,density_table,minus_deriv_gravity_table, &
- xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- wgll_cube, &
- kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
- ibool, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilon_trace_over_3, &
- one_minus_sum_beta,vnspec, &
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
- dummyx_loc,dummyy_loc,dummyz_loc, &
- epsilondev_loc,rho_s_H,is_backward_field)
-
-! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
-
- use constants_solver
- use specfem_par,only: COMPUTE_AND_STORE_STRAIN
-
- implicit none
-
- ! element id
- integer :: ispec
-
- ! arrays with mesh parameters per slice
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool
-
- ! x y and z contain r theta and phi
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: xstore,ystore,zstore
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-
- ! array with derivatives of Lagrange polynomials and precalculated products
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
-
- ! store anisotropic properties only where needed to save memory
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
- kappahstore,muhstore,eta_anisostore
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
- kappavstore,muvstore
-
- ! attenuation
- ! memory variables for attenuation
- ! memory variables R_ij are stored at the local rather than global level
- ! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
-
- real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
-
- ! variable sized array variables
- integer :: vnspec
-
- ! [alpha,beta,gamma]val reduced to N_SLS to N_SLS*NUM_NODES
- real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec) :: one_minus_sum_beta
-
- ! gravity
- double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table,density_table,minus_deriv_gravity_table
-
- ! element info
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
-
- logical :: is_backward_field
-
-! local parameters
- real(kind=CUSTOM_REAL) one_minus_sum_beta_use
- ! the 21 coefficients for an anisotropic medium in reduced notation
- real(kind=CUSTOM_REAL) c11,c22,c33,c44,c55,c66,c12,c13,c23,c14,c24,c34,c15,c25,c35,c45,c16,c26,c36,c46,c56
-
- real(kind=CUSTOM_REAL) rhovphsq,sinphifour,cosphisq,sinphisq,costhetasq,rhovsvsq,sinthetasq, &
- cosphifour,costhetafour,rhovpvsq,sinthetafour,rhovshsq,cosfourphi, &
- costwotheta,cosfourtheta,sintwophisq,costheta,sinphi,sintheta,cosphi, &
- sintwotheta,costwophi,sintwophi,costwothetasq,costwophisq,phi,theta
-
- real(kind=CUSTOM_REAL) two_rhovsvsq,two_rhovshsq ! two_rhovpvsq,two_rhovphsq
- real(kind=CUSTOM_REAL) four_rhovsvsq,four_rhovshsq ! four_rhovpvsq,four_rhovphsq
-
- real(kind=CUSTOM_REAL) twoetaminone,etaminone,eta_aniso
- real(kind=CUSTOM_REAL) two_eta_aniso,four_eta_aniso,six_eta_aniso
- real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
- real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
- real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
- real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
- real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
-
- real(kind=CUSTOM_REAL) templ
- real(kind=CUSTOM_REAL) templ1,templ1_cos,templ2,templ2_cos,templ3,templ3_two,templ3_cos
- real(kind=CUSTOM_REAL) kappavl,kappahl,muvl,muhl
-
- ! for gravity
- double precision dphi,dtheta
- double precision radius,rho,minus_g,minus_dg
- double precision minus_g_over_radius,minus_dg_plus_g_over_radius
- double precision cos_theta,sin_theta,cos_phi,sin_phi
- double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
- double precision factor,sx_l,sy_l,sz_l,gxl,gyl,gzl
- double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
- real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
-
- integer :: i,j,k
- integer :: int_radius
- integer :: iglob
-
- logical :: dummyl
-
-!daniel debug
- ! dummy to avoid compiler warning
- dummyl = is_backward_field
-
- ! transverse isotropic element
-
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- ! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(i,j,k,ispec)
- xiyl = xiy(i,j,k,ispec)
- xizl = xiz(i,j,k,ispec)
- etaxl = etax(i,j,k,ispec)
- etayl = etay(i,j,k,ispec)
- etazl = etaz(i,j,k,ispec)
- gammaxl = gammax(i,j,k,ispec)
- gammayl = gammay(i,j,k,ispec)
- gammazl = gammaz(i,j,k,ispec)
-
- ! compute the jacobian
- jacobianl = 1.0_CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
-
- duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
- duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
- duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
- duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
- duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
- duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
-
- duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
- duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
- duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
-
- ! precompute some sums to save CPU time
- duxdxl_plus_duydyl = duxdxl + duydyl
- duxdxl_plus_duzdzl = duxdxl + duzdzl
- duydyl_plus_duzdzl = duydyl + duzdzl
- duxdyl_plus_duydxl = duxdyl + duydxl
- duzdxl_plus_duxdzl = duzdxl + duxdzl
- duzdyl_plus_duydzl = duzdyl + duydzl
-
-!ZN beware, here the expression differs from the strain used in memory variable equation (6) in D. Komatitsch and J. Tromp 1999,
-!ZN here Brian Savage uses the engineering strain which are epsilon = 1/2*(grad U + (grad U)^T),
-!ZN where U is the displacement vector and grad the gradient operator, i.e. there is a 1/2 factor difference between the two.
-!ZN Both expressions are fine, but we need to keep in mind that if we put the 1/2 factor here there we need to remove it
-!ZN from the expression in which we use the strain later in the code.
-
- ! compute deviatoric strain
- if (COMPUTE_AND_STORE_STRAIN) then
- templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
- if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
- if( ispec == 1) then
- epsilon_trace_over_3(i,j,k,1) = templ
- endif
- else
- epsilon_trace_over_3(i,j,k,ispec) = templ
- endif
- epsilondev_loc(1,i,j,k) = duxdxl - templ
- epsilondev_loc(2,i,j,k) = duydyl - templ
- epsilondev_loc(3,i,j,k) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,i,j,k) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,i,j,k) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
endif
- !
- ! compute either isotropic or anisotropic elements
- !
+ endif ! end of section with gravity terms
-! note: the mesh is built such that anisotropic elements are created first in anisotropic layers,
-! thus they are listed first ( see in create_regions_mesh.f90: perm_layer() ordering )
-! this is therefore still in bounds of 1:NSPECMAX_TISO_MANTLE even if NSPECMAX_TISO is less than NSPEC
+ ! form dot product with test vector, non-symmetric form
+ tempx1(INDEX_IJK) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
+ tempy1(INDEX_IJK) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
+ tempz1(INDEX_IJK) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
- ! use kappa and mu from transversely isotropic model
- kappavl = kappavstore(i,j,k,ispec)
- muvl = muvstore(i,j,k,ispec)
+ tempx2(INDEX_IJK) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
+ tempy2(INDEX_IJK) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
+ tempz2(INDEX_IJK) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
- kappahl = kappahstore(i,j,k,ispec)
- muhl = muhstore(i,j,k,ispec)
+ tempx3(INDEX_IJK) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
+ tempy3(INDEX_IJK) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
+ tempz3(INDEX_IJK) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
- ! use unrelaxed parameters if attenuation
- ! eta does not need to be shifted since it is a ratio
- if( ATTENUATION_VAL ) then
- ! precompute terms for attenuation if needed
- if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- one_minus_sum_beta_use = one_minus_sum_beta(i,j,k,ispec)
- else
- one_minus_sum_beta_use = one_minus_sum_beta(1,1,1,ispec)
- endif
- muvl = muvl * one_minus_sum_beta_use
- muhl = muhl * one_minus_sum_beta_use
- endif
+ ENDDO_LOOP_IJK
- rhovpvsq = kappavl + FOUR_THIRDS * muvl !!! that is C
- rhovphsq = kappahl + FOUR_THIRDS * muhl !!! that is A
+ end subroutine compute_element_iso
- rhovsvsq = muvl !!! that is L
- rhovshsq = muhl !!! that is N
- eta_aniso = eta_anisostore(i,j,k,ispec) !!! that is F / (A - 2 L)
+!--------------------------------------------------------------------------------------------
+!
+! transversely isotropic element
+!
+!--------------------------------------------------------------------------------------------
- ! use mesh coordinates to get theta and phi
- ! ystore and zstore contain theta and phi
- iglob = ibool(i,j,k,ispec)
-
- theta = ystore(iglob)
- phi = zstore(iglob)
-
- ! precompute some products to reduce the CPU time
-
- costheta = cos(theta)
- sintheta = sin(theta)
- cosphi = cos(phi)
- sinphi = sin(phi)
-
- costhetasq = costheta * costheta
- sinthetasq = sintheta * sintheta
- cosphisq = cosphi * cosphi
- sinphisq = sinphi * sinphi
-
- costhetafour = costhetasq * costhetasq
- sinthetafour = sinthetasq * sinthetasq
- cosphifour = cosphisq * cosphisq
- sinphifour = sinphisq * sinphisq
-
- costwotheta = cos(2.0_CUSTOM_REAL*theta)
- sintwotheta = sin(2.0_CUSTOM_REAL*theta)
- costwophi = cos(2.0_CUSTOM_REAL*phi)
- sintwophi = sin(2.0_CUSTOM_REAL*phi)
-
- cosfourtheta = cos(4.0_CUSTOM_REAL*theta)
- cosfourphi = cos(4.0_CUSTOM_REAL*phi)
-
- costwothetasq = costwotheta * costwotheta
-
- costwophisq = costwophi * costwophi
- sintwophisq = sintwophi * sintwophi
-
- etaminone = eta_aniso - 1.0_CUSTOM_REAL
- twoetaminone = 2.0_CUSTOM_REAL * eta_aniso - 1.0_CUSTOM_REAL
-
- ! precompute some products to reduce the CPU time
- two_eta_aniso = 2.0_CUSTOM_REAL*eta_aniso
- four_eta_aniso = 4.0_CUSTOM_REAL*eta_aniso
- six_eta_aniso = 6.0_CUSTOM_REAL*eta_aniso
-
- two_rhovsvsq = 2.0_CUSTOM_REAL*rhovsvsq
- two_rhovshsq = 2.0_CUSTOM_REAL*rhovshsq
- four_rhovsvsq = 4.0_CUSTOM_REAL*rhovsvsq
- four_rhovshsq = 4.0_CUSTOM_REAL*rhovshsq
-
- ! pre-compute temporary values
- templ1 = four_rhovsvsq - rhovpvsq + twoetaminone*rhovphsq - four_eta_aniso*rhovsvsq
- templ1_cos = rhovphsq - rhovpvsq + costwotheta*templ1
- templ2 = four_rhovsvsq - rhovpvsq - rhovphsq + two_eta_aniso*rhovphsq - four_eta_aniso*rhovsvsq
- templ2_cos = rhovpvsq - rhovphsq + costwotheta*templ2
- templ3 = rhovphsq + rhovpvsq - two_eta_aniso*rhovphsq + four_eta_aniso*rhovsvsq
- templ3_two = templ3 - two_rhovshsq - two_rhovsvsq
- templ3_cos = templ3_two + costwotheta*templ2
-
- ! reordering operations to facilitate compilation, avoiding divisions, using locality for temporary values
- c11 = rhovphsq*sinphifour &
- + 2.0_CUSTOM_REAL*cosphisq*sinphisq* &
- ( rhovphsq*costhetasq + sinthetasq*(eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq) ) &
- + cosphifour*(rhovphsq*costhetafour &
- + 2.0_CUSTOM_REAL*costhetasq*sinthetasq*(eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq) &
- + rhovpvsq*sinthetafour)
-
- c12 = 0.25_CUSTOM_REAL*costhetasq*(rhovphsq - two_rhovshsq)*(3.0_CUSTOM_REAL + cosfourphi) &
- - four_rhovshsq*cosphisq*costhetasq*sinphisq &
- + 0.03125_CUSTOM_REAL*rhovphsq*sintwophisq*(11.0_CUSTOM_REAL + cosfourtheta + 4.0*costwotheta) &
- + eta_aniso*sinthetasq*(rhovphsq - two_rhovsvsq) &
- *(cosphifour + sinphifour + 2.0_CUSTOM_REAL*cosphisq*costhetasq*sinphisq) &
- + rhovpvsq*cosphisq*sinphisq*sinthetafour &
- - rhovsvsq*sintwophisq*sinthetafour
-
- c13 = 0.125_CUSTOM_REAL*cosphisq*(rhovphsq + six_eta_aniso*rhovphsq + rhovpvsq - four_rhovsvsq &
- - 12.0_CUSTOM_REAL*eta_aniso*rhovsvsq + cosfourtheta*templ1) &
- + sinphisq*(eta_aniso*costhetasq*(rhovphsq - two_rhovsvsq) + sinthetasq*(rhovphsq - two_rhovshsq))
-
- ! uses temporary templ1 from c13
- c15 = cosphi*costheta*sintheta* &
- ( 0.5_CUSTOM_REAL*cosphisq* (rhovpvsq - rhovphsq + costwotheta*templ1) &
- + etaminone*sinphisq*(rhovphsq - two_rhovsvsq))
-
- c14 = costheta*sinphi*sintheta* &
- ( 0.5_CUSTOM_REAL*cosphisq*(templ2_cos + four_rhovshsq - four_rhovsvsq) &
- + sinphisq*(etaminone*rhovphsq + 2.0_CUSTOM_REAL*(rhovshsq - eta_aniso*rhovsvsq)) )
-
- ! uses temporary templ2_cos from c14
- c16 = 0.5_CUSTOM_REAL*cosphi*sinphi*sinthetasq* &
- ( cosphisq*templ2_cos &
- + 2.0_CUSTOM_REAL*etaminone*sinphisq*(rhovphsq - two_rhovsvsq) )
-
- c22 = rhovphsq*cosphifour + 2.0_CUSTOM_REAL*cosphisq*sinphisq* &
- (rhovphsq*costhetasq + sinthetasq*(eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)) &
- + sinphifour* &
- (rhovphsq*costhetafour + 2.0_CUSTOM_REAL*costhetasq*sinthetasq*(eta_aniso*rhovphsq &
- + two_rhovsvsq - two_eta_aniso*rhovsvsq) + rhovpvsq*sinthetafour)
-
- ! uses temporary templ1 from c13
- c23 = 0.125_CUSTOM_REAL*sinphisq*(rhovphsq + six_eta_aniso*rhovphsq &
- + rhovpvsq - four_rhovsvsq - 12.0_CUSTOM_REAL*eta_aniso*rhovsvsq + cosfourtheta*templ1) &
- + cosphisq*(eta_aniso*costhetasq*(rhovphsq - two_rhovsvsq) + sinthetasq*(rhovphsq - two_rhovshsq))
-
- ! uses temporary templ1 from c13
- c24 = costheta*sinphi*sintheta* &
- ( etaminone*cosphisq*(rhovphsq - two_rhovsvsq) &
- + 0.5_CUSTOM_REAL*sinphisq*(rhovpvsq - rhovphsq + costwotheta*templ1) )
-
- ! uses temporary templ2_cos from c14
- c25 = cosphi*costheta*sintheta* &
- ( cosphisq*(etaminone*rhovphsq + 2.0_CUSTOM_REAL*(rhovshsq - eta_aniso*rhovsvsq)) &
- + 0.5_CUSTOM_REAL*sinphisq*(templ2_cos + four_rhovshsq - four_rhovsvsq) )
-
- ! uses temporary templ2_cos from c14
- c26 = 0.5_CUSTOM_REAL*cosphi*sinphi*sinthetasq* &
- ( 2.0_CUSTOM_REAL*etaminone*cosphisq*(rhovphsq - two_rhovsvsq) &
- + sinphisq*templ2_cos )
-
- c33 = rhovpvsq*costhetafour &
- + 2.0_CUSTOM_REAL*costhetasq*sinthetasq*(two_rhovsvsq + eta_aniso*(rhovphsq - two_rhovsvsq)) &
- + rhovphsq*sinthetafour
-
- ! uses temporary templ1_cos from c13
- c34 = - 0.25_CUSTOM_REAL*sinphi*sintwotheta*templ1_cos
-
- ! uses temporary templ1_cos from c34
- c35 = - 0.25_CUSTOM_REAL*cosphi*sintwotheta*templ1_cos
-
- ! uses temporary templ1_cos from c34
- c36 = - 0.25_CUSTOM_REAL*sintwophi*sinthetasq*(templ1_cos - four_rhovshsq + four_rhovsvsq)
-
- c44 = cosphisq*(rhovsvsq*costhetasq + rhovshsq*sinthetasq) &
- + sinphisq*(rhovsvsq*costwothetasq + costhetasq*sinthetasq*templ3)
-
- ! uses temporary templ3 from c44
- c46 = - cosphi*costheta*sintheta* &
- ( cosphisq*(rhovshsq - rhovsvsq) - 0.5_CUSTOM_REAL*sinphisq*templ3_cos )
-
- ! uses templ3 from c46
- c45 = 0.25_CUSTOM_REAL*sintwophi*sinthetasq* &
- (templ3_two + costwotheta*(rhovphsq + rhovpvsq - two_eta_aniso*rhovphsq + 4.0_CUSTOM_REAL*etaminone*rhovsvsq))
-
- c55 = sinphisq*(rhovsvsq*costhetasq + rhovshsq*sinthetasq) &
- + cosphisq*(rhovsvsq*costwothetasq &
- + costhetasq*sinthetasq*(rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq + four_eta_aniso*rhovsvsq) )
-
- ! uses temporary templ3_cos from c46
- c56 = costheta*sinphi*sintheta* &
- ( 0.5_CUSTOM_REAL*cosphisq*templ3_cos + sinphisq*(rhovsvsq - rhovshsq) )
-
- c66 = rhovshsq*costwophisq*costhetasq &
- - 2.0_CUSTOM_REAL*cosphisq*costhetasq*sinphisq*(rhovphsq - two_rhovshsq) &
- + 0.03125_CUSTOM_REAL*rhovphsq*sintwophisq*(11.0_CUSTOM_REAL + 4.0_CUSTOM_REAL*costwotheta + cosfourtheta) &
- - 0.125_CUSTOM_REAL*rhovsvsq*sinthetasq* &
- ( -6.0_CUSTOM_REAL - 2.0_CUSTOM_REAL*costwotheta - 2.0_CUSTOM_REAL*cosfourphi &
- + cos(4.0_CUSTOM_REAL*phi - 2.0_CUSTOM_REAL*theta) &
- + cos(2.0_CUSTOM_REAL*(2.0_CUSTOM_REAL*phi + theta)) ) &
- + rhovpvsq*cosphisq*sinphisq*sinthetafour &
- - 0.5_CUSTOM_REAL*eta_aniso*sintwophisq*sinthetafour*(rhovphsq - two_rhovsvsq)
-
- ! general expression of stress tensor for full Cijkl with 21 coefficients
- sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
- c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
-
- sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
- c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
-
- sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
- c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
-
- sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
- c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
-
- sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
- c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
-
- sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
- c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
-
- ! subtract memory variables if attenuation
- if(ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL ) then
-
- ! note: function inlining is generally done by fortran compilers;
- ! compilers decide based on performance heuristics
- ! note: fortran passes pointers to array location, thus R_memory(1,1,...) is fine
- call compute_element_att_stress(R_xx(1,i,j,k,ispec), &
- R_yy(1,i,j,k,ispec), &
- R_xy(1,i,j,k,ispec), &
- R_xz(1,i,j,k,ispec), &
- R_yz(1,i,j,k,ispec), &
- sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
-
- endif ! ATTENUATION_VAL
-
- ! define symmetric components of sigma for gravity
- sigma_yx = sigma_xy
- sigma_zx = sigma_xz
- sigma_zy = sigma_yz
-
- ! compute non-symmetric terms for gravity
- if(GRAVITY_VAL) then
- ! use mesh coordinates to get theta and phi
- ! x y and z contain r theta and phi
- iglob = ibool(i,j,k,ispec)
-
- dtheta = dble(ystore(iglob))
- dphi = dble(zstore(iglob))
- radius = dble(xstore(iglob))
-
- cos_theta = dcos(dtheta)
- sin_theta = dsin(dtheta)
- cos_phi = dcos(dphi)
- sin_phi = dsin(dphi)
-
- cos_theta_sq = cos_theta*cos_theta
- sin_theta_sq = sin_theta*sin_theta
- cos_phi_sq = cos_phi*cos_phi
- sin_phi_sq = sin_phi*sin_phi
-
- ! get g, rho and dg/dr=dg
- ! spherical components of the gravitational acceleration
- ! for efficiency replace with lookup table every 100 m in radial direction
- int_radius = nint(10.d0 * radius * R_EARTH_KM )
- minus_g = minus_gravity_table(int_radius)
- minus_dg = minus_deriv_gravity_table(int_radius)
- rho = density_table(int_radius)
-
- ! Cartesian components of the gravitational acceleration
- gxl = minus_g*sin_theta*cos_phi
- gyl = minus_g*sin_theta*sin_phi
- gzl = minus_g*cos_theta
-
- ! Cartesian components of gradient of gravitational acceleration
- ! obtained from spherical components
- minus_g_over_radius = minus_g / radius
- minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius
-
- Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq
- Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq
- Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq
- Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq
- Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
- Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
-
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
-
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * dble(dummyx_loc(i,j,k))
- sy_l = rho * dble(dummyy_loc(i,j,k))
- sz_l = rho * dble(dummyz_loc(i,j,k))
-
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
- sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
- sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
-
- sigma_xy = sigma_xy - sngl(sx_l * gyl)
- sigma_yx = sigma_yx - sngl(sy_l * gxl)
-
- sigma_xz = sigma_xz - sngl(sx_l * gzl)
- sigma_zx = sigma_zx - sngl(sz_l * gxl)
-
- sigma_yz = sigma_yz - sngl(sy_l * gzl)
- sigma_zy = sigma_zy - sngl(sz_l * gyl)
-
- ! precompute vector
- factor = dble(jacobianl) * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
- rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
- rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
-
- else
-
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * dummyx_loc(i,j,k)
- sy_l = rho * dummyy_loc(i,j,k)
- sz_l = rho * dummyz_loc(i,j,k)
-
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
- sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
- sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
-
- sigma_xy = sigma_xy - sx_l * gyl
- sigma_yx = sigma_yx - sy_l * gxl
-
- sigma_xz = sigma_xz - sx_l * gzl
- sigma_zx = sigma_zx - sz_l * gxl
-
- sigma_yz = sigma_yz - sy_l * gzl
- sigma_zy = sigma_zy - sz_l * gyl
-
- ! precompute vector
- factor = jacobianl * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
-
- endif
-
- endif ! end of section with gravity terms
-
- ! form dot product with test vector, non-symmetric form
- tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
- tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
- tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
-
- tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
- tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
- tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
-
- tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
- tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
- tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
-
- enddo ! NGLLX
- enddo ! NGLLY
- enddo ! NGLLZ
-
- end subroutine compute_element_tiso
-
-#else
-
- ! FORCE_VECTORIZATION
-
subroutine compute_element_tiso(ispec, &
minus_gravity_table,density_table,minus_deriv_gravity_table, &
xstore,ystore,zstore, &
@@ -1274,7 +432,7 @@
one_minus_sum_beta,vnspec, &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
dummyx_loc,dummyy_loc,dummyz_loc, &
- epsilondev_loc,rho_s_H,is_backward_field)
+ epsilondev_loc,rho_s_H)
! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
@@ -1329,8 +487,6 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
- logical :: is_backward_field
-
! local parameters
real(kind=CUSTOM_REAL) one_minus_sum_beta_use
! the 21 coefficients for an anisotropic medium in reduced notation
@@ -1369,48 +525,47 @@
integer :: int_radius
integer :: iglob
- logical :: dummyl
-
- ! force vectorization
- integer :: ijk
- real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
-
+#ifdef FORCE_VECTORIZATION
! in this vectorized version we have to assume that N_SLS == 3 in order to be able to unroll and thus suppress
! an inner loop that would otherwise prevent vectorization; this is safe in practice in all cases because N_SLS == 3
! in all known applications, and in the main program we check that N_SLS == 3 if FORCE_VECTORIZATION is used and we stop
+ integer :: ijk
+ real(kind=CUSTOM_REAL) :: R_xx_val,R_yy_val
+#else
+ integer :: i,j,k
+#endif
-!daniel debug
- ! dummy to avoid compiler warning
- dummyl = is_backward_field
+ ! transverse isotropic element
- do ijk=1,NGLLCUBE
+ DO_LOOP_IJK
+
! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(ijk,1,1,ispec)
- xiyl = xiy(ijk,1,1,ispec)
- xizl = xiz(ijk,1,1,ispec)
- etaxl = etax(ijk,1,1,ispec)
- etayl = etay(ijk,1,1,ispec)
- etazl = etaz(ijk,1,1,ispec)
- gammaxl = gammax(ijk,1,1,ispec)
- gammayl = gammay(ijk,1,1,ispec)
- gammazl = gammaz(ijk,1,1,ispec)
+ xixl = xix(INDEX_IJK,ispec)
+ xiyl = xiy(INDEX_IJK,ispec)
+ xizl = xiz(INDEX_IJK,ispec)
+ etaxl = etax(INDEX_IJK,ispec)
+ etayl = etay(INDEX_IJK,ispec)
+ etazl = etaz(INDEX_IJK,ispec)
+ gammaxl = gammax(INDEX_IJK,ispec)
+ gammayl = gammay(INDEX_IJK,ispec)
+ gammazl = gammaz(INDEX_IJK,ispec)
! compute the jacobian
jacobianl = 1.0_CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- xiyl*(etaxl*gammazl-etazl*gammaxl) &
+ xizl*(etaxl*gammayl-etayl*gammaxl))
- duxdxl = xixl*tempx1(ijk,1,1) + etaxl*tempx2(ijk,1,1) + gammaxl*tempx3(ijk,1,1)
- duxdyl = xiyl*tempx1(ijk,1,1) + etayl*tempx2(ijk,1,1) + gammayl*tempx3(ijk,1,1)
- duxdzl = xizl*tempx1(ijk,1,1) + etazl*tempx2(ijk,1,1) + gammazl*tempx3(ijk,1,1)
+ duxdxl = xixl*tempx1(INDEX_IJK) + etaxl*tempx2(INDEX_IJK) + gammaxl*tempx3(INDEX_IJK)
+ duxdyl = xiyl*tempx1(INDEX_IJK) + etayl*tempx2(INDEX_IJK) + gammayl*tempx3(INDEX_IJK)
+ duxdzl = xizl*tempx1(INDEX_IJK) + etazl*tempx2(INDEX_IJK) + gammazl*tempx3(INDEX_IJK)
- duydxl = xixl*tempy1(ijk,1,1) + etaxl*tempy2(ijk,1,1) + gammaxl*tempy3(ijk,1,1)
- duydyl = xiyl*tempy1(ijk,1,1) + etayl*tempy2(ijk,1,1) + gammayl*tempy3(ijk,1,1)
- duydzl = xizl*tempy1(ijk,1,1) + etazl*tempy2(ijk,1,1) + gammazl*tempy3(ijk,1,1)
+ duydxl = xixl*tempy1(INDEX_IJK) + etaxl*tempy2(INDEX_IJK) + gammaxl*tempy3(INDEX_IJK)
+ duydyl = xiyl*tempy1(INDEX_IJK) + etayl*tempy2(INDEX_IJK) + gammayl*tempy3(INDEX_IJK)
+ duydzl = xizl*tempy1(INDEX_IJK) + etazl*tempy2(INDEX_IJK) + gammazl*tempy3(INDEX_IJK)
- duzdxl = xixl*tempz1(ijk,1,1) + etaxl*tempz2(ijk,1,1) + gammaxl*tempz3(ijk,1,1)
- duzdyl = xiyl*tempz1(ijk,1,1) + etayl*tempz2(ijk,1,1) + gammayl*tempz3(ijk,1,1)
- duzdzl = xizl*tempz1(ijk,1,1) + etazl*tempz2(ijk,1,1) + gammazl*tempz3(ijk,1,1)
+ duzdxl = xixl*tempz1(INDEX_IJK) + etaxl*tempz2(INDEX_IJK) + gammaxl*tempz3(INDEX_IJK)
+ duzdyl = xiyl*tempz1(INDEX_IJK) + etayl*tempz2(INDEX_IJK) + gammayl*tempz3(INDEX_IJK)
+ duzdzl = xizl*tempz1(INDEX_IJK) + etazl*tempz2(INDEX_IJK) + gammazl*tempz3(INDEX_IJK)
! precompute some sums to save CPU time
duxdxl_plus_duydyl = duxdxl + duydyl
@@ -1431,16 +586,16 @@
templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
if( ispec == 1) then
- epsilon_trace_over_3(ijk,1,1,1) = templ
+ epsilon_trace_over_3(INDEX_IJK,1) = templ
endif
else
- epsilon_trace_over_3(ijk,1,1,ispec) = templ
+ epsilon_trace_over_3(INDEX_IJK,ispec) = templ
endif
- epsilondev_loc(1,ijk,1,1) = duxdxl - templ
- epsilondev_loc(2,ijk,1,1) = duydyl - templ
- epsilondev_loc(3,ijk,1,1) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,ijk,1,1) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,ijk,1,1) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
+ epsilondev_loc(1,INDEX_IJK) = duxdxl - templ
+ epsilondev_loc(2,INDEX_IJK) = duydyl - templ
+ epsilondev_loc(3,INDEX_IJK) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_loc(4,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_loc(5,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
endif
!
@@ -1452,18 +607,18 @@
! this is therefore still in bounds of 1:NSPECMAX_TISO_MANTLE even if NSPECMAX_TISO is less than NSPEC
! use kappa and mu from transversely isotropic model
- kappavl = kappavstore(ijk,1,1,ispec)
- muvl = muvstore(ijk,1,1,ispec)
+ kappavl = kappavstore(INDEX_IJK,ispec)
+ muvl = muvstore(INDEX_IJK,ispec)
- kappahl = kappahstore(ijk,1,1,ispec)
- muhl = muhstore(ijk,1,1,ispec)
+ kappahl = kappahstore(INDEX_IJK,ispec)
+ muhl = muhstore(INDEX_IJK,ispec)
! use unrelaxed parameters if attenuation
! eta does not need to be shifted since it is a ratio
- if(ATTENUATION_VAL) then
+ if( ATTENUATION_VAL ) then
! precompute terms for attenuation if needed
if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- one_minus_sum_beta_use = one_minus_sum_beta(ijk,1,1,ispec)
+ one_minus_sum_beta_use = one_minus_sum_beta(INDEX_IJK,ispec)
else
one_minus_sum_beta_use = one_minus_sum_beta(1,1,1,ispec)
endif
@@ -1477,11 +632,11 @@
rhovsvsq = muvl !!! that is L
rhovshsq = muhl !!! that is N
- eta_aniso = eta_anisostore(ijk,1,1,ispec) !!! that is F / (A - 2 L)
+ eta_aniso = eta_anisostore(INDEX_IJK,ispec) !!! that is F / (A - 2 L)
! use mesh coordinates to get theta and phi
! ystore and zstore contain theta and phi
- iglob = ibool(ijk,1,1,ispec)
+ iglob = ibool(INDEX_IJK,ispec)
theta = ystore(iglob)
phi = zstore(iglob)
@@ -1660,35 +815,51 @@
c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
! subtract memory variables if attenuation
- if(ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL) then
-! do NOT put this is a subroutine, otherwise the call to the subroutine prevents compilers from vectorizing the outer loop
-! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop in order to vectorize the outer loop
- R_xx_val = R_xx(1,ijk,1,1,ispec)
- R_yy_val = R_yy(1,ijk,1,1,ispec)
+ if(ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL ) then
+#ifdef FORCE_VECTORIZATION
+ ! do NOT put this is a subroutine, otherwise the call to the subroutine prevents compilers
+ ! from vectorizing the outer loop
+
+ ! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop
+ ! in order to vectorize the outer loop
+ R_xx_val = R_xx(1,INDEX_IJK,ispec)
+ R_yy_val = R_yy(1,INDEX_IJK,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(1,ijk,1,1,ispec)
- sigma_xz = sigma_xz - R_xz(1,ijk,1,1,ispec)
- sigma_yz = sigma_yz - R_yz(1,ijk,1,1,ispec)
+ sigma_xy = sigma_xy - R_xy(1,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(1,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(1,INDEX_IJK,ispec)
- R_xx_val = R_xx(2,ijk,1,1,ispec)
- R_yy_val = R_yy(2,ijk,1,1,ispec)
+ R_xx_val = R_xx(2,INDEX_IJK,ispec)
+ R_yy_val = R_yy(2,INDEX_IJK,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(2,ijk,1,1,ispec)
- sigma_xz = sigma_xz - R_xz(2,ijk,1,1,ispec)
- sigma_yz = sigma_yz - R_yz(2,ijk,1,1,ispec)
+ sigma_xy = sigma_xy - R_xy(2,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(2,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(2,INDEX_IJK,ispec)
R_xx_val = R_xx(3,ijk,1,1,ispec)
R_yy_val = R_yy(3,ijk,1,1,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(3,ijk,1,1,ispec)
- sigma_xz = sigma_xz - R_xz(3,ijk,1,1,ispec)
- sigma_yz = sigma_yz - R_yz(3,ijk,1,1,ispec)
+ sigma_xy = sigma_xy - R_xy(3,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(3,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(3,INDEX_IJK,ispec)
+#else
+ ! note: function inlining is generally done by fortran compilers;
+ ! compilers decide based on performance heuristics
+ ! note: fortran passes pointers to array location, thus R_memory(1,1,...) is fine
+ call compute_element_att_stress(R_xx(1,INDEX_IJK,ispec), &
+ R_yy(1,INDEX_IJK,ispec), &
+ R_xy(1,INDEX_IJK,ispec), &
+ R_xz(1,INDEX_IJK,ispec), &
+ R_yz(1,INDEX_IJK,ispec), &
+ sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
+#endif
+
endif ! ATTENUATION_VAL
! define symmetric components of sigma for gravity
@@ -1700,7 +871,7 @@
if(GRAVITY_VAL) then
! use mesh coordinates to get theta and phi
! x y and z contain r theta and phi
- iglob = ibool(ijk,1,1,ispec)
+ iglob = ibool(INDEX_IJK,ispec)
dtheta = dble(ystore(iglob))
dphi = dble(zstore(iglob))
@@ -1741,440 +912,89 @@
Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * dummyx_loc(ijk,1,1)
- sy_l = rho * dummyy_loc(ijk,1,1)
- sz_l = rho * dummyz_loc(ijk,1,1)
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
- sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
- sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
+ ! get displacement and multiply by density to compute G tensor
+ sx_l = rho * dble(dummyx_loc(INDEX_IJK))
+ sy_l = rho * dble(dummyy_loc(INDEX_IJK))
+ sz_l = rho * dble(dummyz_loc(INDEX_IJK))
- sigma_xy = sigma_xy - sx_l * gyl
- sigma_yx = sigma_yx - sy_l * gxl
+ ! compute G tensor from s . g and add to sigma (not symmetric)
+ sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
+ sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
+ sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
- sigma_xz = sigma_xz - sx_l * gzl
- sigma_zx = sigma_zx - sz_l * gxl
+ sigma_xy = sigma_xy - sngl(sx_l * gyl)
+ sigma_yx = sigma_yx - sngl(sy_l * gxl)
- sigma_yz = sigma_yz - sy_l * gzl
- sigma_zy = sigma_zy - sz_l * gyl
+ sigma_xz = sigma_xz - sngl(sx_l * gzl)
+ sigma_zx = sigma_zx - sngl(sz_l * gxl)
- ! precompute vector
- factor = jacobianl * wgll_cube(ijk,1,1)
- rho_s_H(1,ijk,1,1) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,ijk,1,1) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,ijk,1,1) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
- endif ! end of section with gravity terms
+ sigma_yz = sigma_yz - sngl(sy_l * gzl)
+ sigma_zy = sigma_zy - sngl(sz_l * gyl)
- ! form dot product with test vector, non-symmetric form
- tempx1(ijk,1,1) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
- tempy1(ijk,1,1) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
- tempz1(ijk,1,1) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
+ ! precompute vector
+ factor = dble(jacobianl) * wgll_cube(INDEX_IJK)
+ rho_s_H(1,INDEX_IJK) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+ rho_s_H(2,INDEX_IJK) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+ rho_s_H(3,INDEX_IJK) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
- tempx2(ijk,1,1) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
- tempy2(ijk,1,1) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
- tempz2(ijk,1,1) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
+ else
- tempx3(ijk,1,1) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
- tempy3(ijk,1,1) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
- tempz3(ijk,1,1) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
- enddo
+ ! get displacement and multiply by density to compute G tensor
+ sx_l = rho * dummyx_loc(INDEX_IJK)
+ sy_l = rho * dummyy_loc(INDEX_IJK)
+ sz_l = rho * dummyz_loc(INDEX_IJK)
- end subroutine compute_element_tiso
+ ! compute G tensor from s . g and add to sigma (not symmetric)
+ sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
+ sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
+ sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
-! end of FORCE_VECTORIZATION
-#endif
+ sigma_xy = sigma_xy - sx_l * gyl
+ sigma_yx = sigma_yx - sy_l * gxl
+ sigma_xz = sigma_xz - sx_l * gzl
+ sigma_zx = sigma_zx - sz_l * gxl
-!--------------------------------------------------------------------------------------------
-!
-! anisotropic element
-!
-!--------------------------------------------------------------------------------------------
+ sigma_yz = sigma_yz - sy_l * gzl
+ sigma_zy = sigma_zy - sz_l * gyl
-#ifndef FORCE_VECTORIZATION
+ ! precompute vector
+ factor = jacobianl * wgll_cube(INDEX_IJK)
+ rho_s_H(1,INDEX_IJK) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+ rho_s_H(2,INDEX_IJK) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+ rho_s_H(3,INDEX_IJK) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
- ! default, without vectorization
-
- subroutine compute_element_aniso(ispec, &
- minus_gravity_table,density_table,minus_deriv_gravity_table, &
- xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- wgll_cube, &
- c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
- c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
- c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
- ibool, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilon_trace_over_3, &
- one_minus_sum_beta,vnspec, &
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
- dummyx_loc,dummyy_loc,dummyz_loc, &
- epsilondev_loc,rho_s_H,is_backward_field)
-
- use constants_solver
- use specfem_par,only: COMPUTE_AND_STORE_STRAIN
-
- implicit none
-
- ! element id
- integer :: ispec
-
- ! arrays with mesh parameters per slice
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool
-
- ! x y and z contain r theta and phi
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: xstore,ystore,zstore
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-
- ! array with derivatives of Lagrange polynomials and precalculated products
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
-
- ! arrays for full anisotropy only when needed
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: &
- c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
- c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
- c36store,c44store,c45store,c46store,c55store,c56store,c66store
-
- ! attenuation
- ! memory variables for attenuation
- ! memory variables R_ij are stored at the local rather than global level
- ! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
-
- real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
-
- ! variable sized array variables
- integer :: vnspec
-
- ! [alpha,beta,gamma]val reduced to N_SLS to N_SLS*NUM_NODES
- real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec) :: one_minus_sum_beta
-
- ! gravity
- double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table,density_table,minus_deriv_gravity_table
-
- ! element info
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
-
- logical :: is_backward_field
-
-! local parameters
- ! real(kind=CUSTOM_REAL) one_minus_sum_beta_use
- real(kind=CUSTOM_REAL) minus_sum_beta,mul
- ! the 21 coefficients for an anisotropic medium in reduced notation
- real(kind=CUSTOM_REAL) c11,c22,c33,c44,c55,c66,c12,c13,c23,c14,c24,c34,c15,c25,c35,c45,c16,c26,c36,c46,c56
-
- real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
- real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
- real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
- real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
- real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
-
- real(kind=CUSTOM_REAL) templ
-
- ! for gravity
- double precision dphi,dtheta
- double precision radius,rho,minus_g,minus_dg
- double precision minus_g_over_radius,minus_dg_plus_g_over_radius
- double precision cos_theta,sin_theta,cos_phi,sin_phi
- double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
- double precision factor,sx_l,sy_l,sz_l,gxl,gyl,gzl
- double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
- real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
-
- integer :: i,j,k
- integer :: int_radius
- integer :: iglob
-
- logical :: dummyl
-
-!daniel debug
- ! dummy to avoid compiler warning
- dummyl = is_backward_field
-
- ! anisotropic elements
-
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- ! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(i,j,k,ispec)
- xiyl = xiy(i,j,k,ispec)
- xizl = xiz(i,j,k,ispec)
- etaxl = etax(i,j,k,ispec)
- etayl = etay(i,j,k,ispec)
- etazl = etaz(i,j,k,ispec)
- gammaxl = gammax(i,j,k,ispec)
- gammayl = gammay(i,j,k,ispec)
- gammazl = gammaz(i,j,k,ispec)
-
- ! compute the jacobian
- jacobianl = 1.0_CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
-
- duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
- duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
- duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
- duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
- duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
- duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
-
- duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
- duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
- duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
-
- ! precompute some sums to save CPU time
- duxdxl_plus_duydyl = duxdxl + duydyl
- duxdxl_plus_duzdzl = duxdxl + duzdzl
- duydyl_plus_duzdzl = duydyl + duzdzl
- duxdyl_plus_duydxl = duxdyl + duydxl
- duzdxl_plus_duxdzl = duzdxl + duxdzl
- duzdyl_plus_duydzl = duzdyl + duydzl
-
-!ZN beware, here the expression differs from the strain used in memory variable equation (6) in D. Komatitsch and J. Tromp 1999,
-!ZN here Brian Savage uses the engineering strain which are epsilon = 1/2*(grad U + (grad U)^T),
-!ZN where U is the displacement vector and grad the gradient operator, i.e. there is a 1/2 factor difference between the two.
-!ZN Both expressions are fine, but we need to keep in mind that if we put the 1/2 factor here there we need to remove it
-!ZN from the expression in which we use the strain later in the code.
-
- ! compute deviatoric strain
- if (COMPUTE_AND_STORE_STRAIN) then
- templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
- if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
- if( ispec == 1) then
- epsilon_trace_over_3(i,j,k,1) = templ
- endif
- else
- epsilon_trace_over_3(i,j,k,ispec) = templ
- endif
- epsilondev_loc(1,i,j,k) = duxdxl - templ
- epsilondev_loc(2,i,j,k) = duydyl - templ
- epsilondev_loc(3,i,j,k) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,i,j,k) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,i,j,k) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
endif
- !
- ! compute anisotropic elements
- !
+ endif ! end of section with gravity terms
- c11 = c11store(i,j,k,ispec)
- c12 = c12store(i,j,k,ispec)
- c13 = c13store(i,j,k,ispec)
- c14 = c14store(i,j,k,ispec)
- c15 = c15store(i,j,k,ispec)
- c16 = c16store(i,j,k,ispec)
- c22 = c22store(i,j,k,ispec)
- c23 = c23store(i,j,k,ispec)
- c24 = c24store(i,j,k,ispec)
- c25 = c25store(i,j,k,ispec)
- c26 = c26store(i,j,k,ispec)
- c33 = c33store(i,j,k,ispec)
- c34 = c34store(i,j,k,ispec)
- c35 = c35store(i,j,k,ispec)
- c36 = c36store(i,j,k,ispec)
- c44 = c44store(i,j,k,ispec)
- c45 = c45store(i,j,k,ispec)
- c46 = c46store(i,j,k,ispec)
- c55 = c55store(i,j,k,ispec)
- c56 = c56store(i,j,k,ispec)
- c66 = c66store(i,j,k,ispec)
+ ! form dot product with test vector, non-symmetric form
+ tempx1(INDEX_IJK) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
+ tempy1(INDEX_IJK) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
+ tempz1(INDEX_IJK) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
- if( ATTENUATION_VAL ) then
- ! precompute terms for attenuation if needed
- if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- minus_sum_beta = one_minus_sum_beta(i,j,k,ispec) - 1.0_CUSTOM_REAL
- else
- minus_sum_beta = one_minus_sum_beta(1,1,1,ispec) - 1.0_CUSTOM_REAL
- endif
- mul = c44 * minus_sum_beta
- c11 = c11 + FOUR_THIRDS * mul ! * minus_sum_beta * mul
- c12 = c12 - TWO_THIRDS * mul
- c13 = c13 - TWO_THIRDS * mul
- c22 = c22 + FOUR_THIRDS * mul
- c23 = c23 - TWO_THIRDS * mul
- c33 = c33 + FOUR_THIRDS * mul
- c44 = c44 + mul
- c55 = c55 + mul
- c66 = c66 + mul
- endif
+ tempx2(INDEX_IJK) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
+ tempy2(INDEX_IJK) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
+ tempz2(INDEX_IJK) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
- sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
- c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
+ tempx3(INDEX_IJK) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
+ tempy3(INDEX_IJK) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
+ tempz3(INDEX_IJK) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
- sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
- c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
+ ENDDO_LOOP_IJK
- sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
- c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
+ end subroutine compute_element_tiso
- sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
- c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
- sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
- c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
+!--------------------------------------------------------------------------------------------
+!
+! anisotropic element
+!
+!--------------------------------------------------------------------------------------------
- sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
- c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
-
- ! subtract memory variables if attenuation
- if(ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL ) then
- ! note: function inlining is generally done by fortran compilers;
- ! compilers decide based on performance heuristics
-
- ! note: Fortran passes pointers to array location, thus R_memory(1,1,...) is fine
- call compute_element_att_stress(R_xx(1,i,j,k,ispec), &
- R_yy(1,i,j,k,ispec), &
- R_xy(1,i,j,k,ispec), &
- R_xz(1,i,j,k,ispec), &
- R_yz(1,i,j,k,ispec), &
- sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
-
- endif ! ATTENUATION_VAL
-
- ! define symmetric components of sigma for gravity
- sigma_yx = sigma_xy
- sigma_zx = sigma_xz
- sigma_zy = sigma_yz
-
- ! compute non-symmetric terms for gravity
- if(GRAVITY_VAL) then
- ! use mesh coordinates to get theta and phi
- ! x y and z contain r theta and phi
- iglob = ibool(i,j,k,ispec)
-
- dtheta = dble(ystore(iglob))
- dphi = dble(zstore(iglob))
- radius = dble(xstore(iglob))
-
- cos_theta = dcos(dtheta)
- sin_theta = dsin(dtheta)
- cos_phi = dcos(dphi)
- sin_phi = dsin(dphi)
-
- cos_theta_sq = cos_theta*cos_theta
- sin_theta_sq = sin_theta*sin_theta
- cos_phi_sq = cos_phi*cos_phi
- sin_phi_sq = sin_phi*sin_phi
-
- ! get g, rho and dg/dr=dg
- ! spherical components of the gravitational acceleration
- ! for efficiency replace with lookup table every 100 m in radial direction
- int_radius = nint(10.d0 * radius * R_EARTH_KM )
- minus_g = minus_gravity_table(int_radius)
- minus_dg = minus_deriv_gravity_table(int_radius)
- rho = density_table(int_radius)
-
- ! Cartesian components of the gravitational acceleration
- gxl = minus_g*sin_theta*cos_phi
- gyl = minus_g*sin_theta*sin_phi
- gzl = minus_g*cos_theta
-
- ! Cartesian components of gradient of gravitational acceleration
- ! obtained from spherical components
- minus_g_over_radius = minus_g / radius
- minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius
-
- Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq
- Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq
- Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq
- Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq
- Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
- Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
-
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
-
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * dble(dummyx_loc(i,j,k)) ! dble(displ_crust_mantle(1,iglob))
- sy_l = rho * dble(dummyy_loc(i,j,k)) ! dble(displ_crust_mantle(2,iglob))
- sz_l = rho * dble(dummyz_loc(i,j,k)) ! dble(displ_crust_mantle(3,iglob))
-
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
- sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
- sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
-
- sigma_xy = sigma_xy - sngl(sx_l * gyl)
- sigma_yx = sigma_yx - sngl(sy_l * gxl)
-
- sigma_xz = sigma_xz - sngl(sx_l * gzl)
- sigma_zx = sigma_zx - sngl(sz_l * gxl)
-
- sigma_yz = sigma_yz - sngl(sy_l * gzl)
- sigma_zy = sigma_zy - sngl(sz_l * gyl)
-
- ! precompute vector
- factor = dble(jacobianl) * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
- rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
- rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
-
- else
-
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * dummyx_loc(i,j,k) ! displ_crust_mantle(1,iglob)
- sy_l = rho * dummyy_loc(i,j,k) ! displ_crust_mantle(2,iglob)
- sz_l = rho * dummyz_loc(i,j,k) ! displ_crust_mantle(3,iglob)
-
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
- sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
- sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
-
- sigma_xy = sigma_xy - sx_l * gyl
- sigma_yx = sigma_yx - sy_l * gxl
-
- sigma_xz = sigma_xz - sx_l * gzl
- sigma_zx = sigma_zx - sz_l * gxl
-
- sigma_yz = sigma_yz - sy_l * gzl
- sigma_zy = sigma_zy - sz_l * gyl
-
- ! precompute vector
- factor = jacobianl * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
-
- endif
-
- endif ! end of section with gravity terms
-
- ! form dot product with test vector, non-symmetric form
- tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
- tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
- tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
-
- tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
- tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
- tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
-
- tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
- tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
- tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
-
- enddo ! NGLLX
- enddo ! NGLLY
- enddo ! NGLLZ
-
- end subroutine compute_element_aniso
-
-
-#else
-
- ! FORCE_VECTORIZATION
-
subroutine compute_element_aniso(ispec, &
minus_gravity_table,density_table,minus_deriv_gravity_table, &
xstore,ystore,zstore, &
@@ -2189,7 +1009,7 @@
one_minus_sum_beta,vnspec, &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
dummyx_loc,dummyy_loc,dummyz_loc, &
- epsilondev_loc,rho_s_H,is_backward_field)
+ epsilondev_loc,rho_s_H)
use constants_solver
use specfem_par,only: COMPUTE_AND_STORE_STRAIN
@@ -2242,10 +1062,7 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
- logical :: is_backward_field
-
-! local parameters
- ! real(kind=CUSTOM_REAL) one_minus_sum_beta_use
+ ! local parameters
real(kind=CUSTOM_REAL) minus_sum_beta,mul
! the 21 coefficients for an anisotropic medium in reduced notation
real(kind=CUSTOM_REAL) c11,c22,c33,c44,c55,c66,c12,c13,c23,c14,c24,c34,c15,c25,c35,c45,c16,c26,c36,c46,c56
@@ -2271,50 +1088,47 @@
integer :: int_radius
integer :: iglob
- logical :: dummyl
-
- ! force vectorization
- integer :: ijk
- real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
-
+#ifdef FORCE_VECTORIZATION
! in this vectorized version we have to assume that N_SLS == 3 in order to be able to unroll and thus suppress
! an inner loop that would otherwise prevent vectorization; this is safe in practice in all cases because N_SLS == 3
-! in all known applications, and in the main program we check that N_SLS == 3 if FORCE_VECTORIZATION is used and we stop
+! in all known applications, and in the main program we check that N_SLS == 3 if FORCE_VECTORIZATION is used and we stop
+ integer :: ijk
+ real(kind=CUSTOM_REAL) :: R_xx_val,R_yy_val
+#else
+ integer :: i,j,k
+#endif
-!daniel debug
- ! dummy to avoid compiler warning
- dummyl = is_backward_field
+ ! anisotropic elements
- ! anisotropic elements
+ DO_LOOP_IJK
- do ijk=1,NGLLCUBE
! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(ijk,1,1,ispec)
- xiyl = xiy(ijk,1,1,ispec)
- xizl = xiz(ijk,1,1,ispec)
- etaxl = etax(ijk,1,1,ispec)
- etayl = etay(ijk,1,1,ispec)
- etazl = etaz(ijk,1,1,ispec)
- gammaxl = gammax(ijk,1,1,ispec)
- gammayl = gammay(ijk,1,1,ispec)
- gammazl = gammaz(ijk,1,1,ispec)
+ xixl = xix(INDEX_IJK,ispec)
+ xiyl = xiy(INDEX_IJK,ispec)
+ xizl = xiz(INDEX_IJK,ispec)
+ etaxl = etax(INDEX_IJK,ispec)
+ etayl = etay(INDEX_IJK,ispec)
+ etazl = etaz(INDEX_IJK,ispec)
+ gammaxl = gammax(INDEX_IJK,ispec)
+ gammayl = gammay(INDEX_IJK,ispec)
+ gammazl = gammaz(INDEX_IJK,ispec)
! compute the jacobian
jacobianl = 1.0_CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- xiyl*(etaxl*gammazl-etazl*gammaxl) &
+ xizl*(etaxl*gammayl-etayl*gammaxl))
- duxdxl = xixl*tempx1(ijk,1,1) + etaxl*tempx2(ijk,1,1) + gammaxl*tempx3(ijk,1,1)
- duxdyl = xiyl*tempx1(ijk,1,1) + etayl*tempx2(ijk,1,1) + gammayl*tempx3(ijk,1,1)
- duxdzl = xizl*tempx1(ijk,1,1) + etazl*tempx2(ijk,1,1) + gammazl*tempx3(ijk,1,1)
+ duxdxl = xixl*tempx1(INDEX_IJK) + etaxl*tempx2(INDEX_IJK) + gammaxl*tempx3(INDEX_IJK)
+ duxdyl = xiyl*tempx1(INDEX_IJK) + etayl*tempx2(INDEX_IJK) + gammayl*tempx3(INDEX_IJK)
+ duxdzl = xizl*tempx1(INDEX_IJK) + etazl*tempx2(INDEX_IJK) + gammazl*tempx3(INDEX_IJK)
- duydxl = xixl*tempy1(ijk,1,1) + etaxl*tempy2(ijk,1,1) + gammaxl*tempy3(ijk,1,1)
- duydyl = xiyl*tempy1(ijk,1,1) + etayl*tempy2(ijk,1,1) + gammayl*tempy3(ijk,1,1)
- duydzl = xizl*tempy1(ijk,1,1) + etazl*tempy2(ijk,1,1) + gammazl*tempy3(ijk,1,1)
+ duydxl = xixl*tempy1(INDEX_IJK) + etaxl*tempy2(INDEX_IJK) + gammaxl*tempy3(INDEX_IJK)
+ duydyl = xiyl*tempy1(INDEX_IJK) + etayl*tempy2(INDEX_IJK) + gammayl*tempy3(INDEX_IJK)
+ duydzl = xizl*tempy1(INDEX_IJK) + etazl*tempy2(INDEX_IJK) + gammazl*tempy3(INDEX_IJK)
- duzdxl = xixl*tempz1(ijk,1,1) + etaxl*tempz2(ijk,1,1) + gammaxl*tempz3(ijk,1,1)
- duzdyl = xiyl*tempz1(ijk,1,1) + etayl*tempz2(ijk,1,1) + gammayl*tempz3(ijk,1,1)
- duzdzl = xizl*tempz1(ijk,1,1) + etazl*tempz2(ijk,1,1) + gammazl*tempz3(ijk,1,1)
+ duzdxl = xixl*tempz1(INDEX_IJK) + etaxl*tempz2(INDEX_IJK) + gammaxl*tempz3(INDEX_IJK)
+ duzdyl = xiyl*tempz1(INDEX_IJK) + etayl*tempz2(INDEX_IJK) + gammayl*tempz3(INDEX_IJK)
+ duzdzl = xizl*tempz1(INDEX_IJK) + etazl*tempz2(INDEX_IJK) + gammazl*tempz3(INDEX_IJK)
! precompute some sums to save CPU time
duxdxl_plus_duydyl = duxdxl + duydyl
@@ -2335,48 +1149,48 @@
templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
if( ispec == 1) then
- epsilon_trace_over_3(ijk,1,1,1) = templ
+ epsilon_trace_over_3(INDEX_IJK,1) = templ
endif
else
- epsilon_trace_over_3(ijk,1,1,ispec) = templ
+ epsilon_trace_over_3(INDEX_IJK,ispec) = templ
endif
- epsilondev_loc(1,ijk,1,1) = duxdxl - templ
- epsilondev_loc(2,ijk,1,1) = duydyl - templ
- epsilondev_loc(3,ijk,1,1) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,ijk,1,1) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,ijk,1,1) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
+ epsilondev_loc(1,INDEX_IJK) = duxdxl - templ
+ epsilondev_loc(2,INDEX_IJK) = duydyl - templ
+ epsilondev_loc(3,INDEX_IJK) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_loc(4,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_loc(5,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
endif
!
! compute anisotropic elements
!
- c11 = c11store(ijk,1,1,ispec)
- c12 = c12store(ijk,1,1,ispec)
- c13 = c13store(ijk,1,1,ispec)
- c14 = c14store(ijk,1,1,ispec)
- c15 = c15store(ijk,1,1,ispec)
- c16 = c16store(ijk,1,1,ispec)
- c22 = c22store(ijk,1,1,ispec)
- c23 = c23store(ijk,1,1,ispec)
- c24 = c24store(ijk,1,1,ispec)
- c25 = c25store(ijk,1,1,ispec)
- c26 = c26store(ijk,1,1,ispec)
- c33 = c33store(ijk,1,1,ispec)
- c34 = c34store(ijk,1,1,ispec)
- c35 = c35store(ijk,1,1,ispec)
- c36 = c36store(ijk,1,1,ispec)
- c44 = c44store(ijk,1,1,ispec)
- c45 = c45store(ijk,1,1,ispec)
- c46 = c46store(ijk,1,1,ispec)
- c55 = c55store(ijk,1,1,ispec)
- c56 = c56store(ijk,1,1,ispec)
- c66 = c66store(ijk,1,1,ispec)
+ c11 = c11store(INDEX_IJK,ispec)
+ c12 = c12store(INDEX_IJK,ispec)
+ c13 = c13store(INDEX_IJK,ispec)
+ c14 = c14store(INDEX_IJK,ispec)
+ c15 = c15store(INDEX_IJK,ispec)
+ c16 = c16store(INDEX_IJK,ispec)
+ c22 = c22store(INDEX_IJK,ispec)
+ c23 = c23store(INDEX_IJK,ispec)
+ c24 = c24store(INDEX_IJK,ispec)
+ c25 = c25store(INDEX_IJK,ispec)
+ c26 = c26store(INDEX_IJK,ispec)
+ c33 = c33store(INDEX_IJK,ispec)
+ c34 = c34store(INDEX_IJK,ispec)
+ c35 = c35store(INDEX_IJK,ispec)
+ c36 = c36store(INDEX_IJK,ispec)
+ c44 = c44store(INDEX_IJK,ispec)
+ c45 = c45store(INDEX_IJK,ispec)
+ c46 = c46store(INDEX_IJK,ispec)
+ c55 = c55store(INDEX_IJK,ispec)
+ c56 = c56store(INDEX_IJK,ispec)
+ c66 = c66store(INDEX_IJK,ispec)
- if(ATTENUATION_VAL) then
+ if( ATTENUATION_VAL ) then
! precompute terms for attenuation if needed
if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- minus_sum_beta = one_minus_sum_beta(ijk,1,1,ispec) - 1.0_CUSTOM_REAL
+ minus_sum_beta = one_minus_sum_beta(INDEX_IJK,ispec) - 1.0_CUSTOM_REAL
else
minus_sum_beta = one_minus_sum_beta(1,1,1,ispec) - 1.0_CUSTOM_REAL
endif
@@ -2411,35 +1225,51 @@
c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
! subtract memory variables if attenuation
- if(ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL) then
-! do NOT put this is a subroutine, otherwise the call to the subroutine prevents compilers from vectorizing the outer loop
-! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop in order to vectorize the outer loop
- R_xx_val = R_xx(1,ijk,1,1,ispec)
- R_yy_val = R_yy(1,ijk,1,1,ispec)
+ if(ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL ) then
+#ifdef FORCE_VECTORIZATION
+ ! do NOT put this is a subroutine, otherwise the call to the subroutine prevents compilers
+ ! from vectorizing the outer loop
+
+ ! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop
+ ! in order to vectorize the outer loop
+ R_xx_val = R_xx(1,INDEX_IJK,ispec)
+ R_yy_val = R_yy(1,INDEX_IJK,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(1,ijk,1,1,ispec)
- sigma_xz = sigma_xz - R_xz(1,ijk,1,1,ispec)
- sigma_yz = sigma_yz - R_yz(1,ijk,1,1,ispec)
+ sigma_xy = sigma_xy - R_xy(1,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(1,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(1,INDEX_IJK,ispec)
- R_xx_val = R_xx(2,ijk,1,1,ispec)
- R_yy_val = R_yy(2,ijk,1,1,ispec)
+ R_xx_val = R_xx(2,INDEX_IJK,ispec)
+ R_yy_val = R_yy(2,INDEX_IJK,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(2,ijk,1,1,ispec)
- sigma_xz = sigma_xz - R_xz(2,ijk,1,1,ispec)
- sigma_yz = sigma_yz - R_yz(2,ijk,1,1,ispec)
+ sigma_xy = sigma_xy - R_xy(2,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(2,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(2,INDEX_IJK,ispec)
- R_xx_val = R_xx(3,ijk,1,1,ispec)
- R_yy_val = R_yy(3,ijk,1,1,ispec)
+ R_xx_val = R_xx(3,INDEX_IJK,ispec)
+ R_yy_val = R_yy(3,INDEX_IJK,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(3,ijk,1,1,ispec)
- sigma_xz = sigma_xz - R_xz(3,ijk,1,1,ispec)
- sigma_yz = sigma_yz - R_yz(3,ijk,1,1,ispec)
+ sigma_xy = sigma_xy - R_xy(3,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(3,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(3,INDEX_IJK,ispec)
+#else
+ ! note: function inlining is generally done by fortran compilers;
+ ! compilers decide based on performance heuristics
+
+ ! note: Fortran passes pointers to array location, thus R_memory(1,1,...) is fine
+ call compute_element_att_stress(R_xx(1,INDEX_IJK,ispec), &
+ R_yy(1,INDEX_IJK,ispec), &
+ R_xy(1,INDEX_IJK,ispec), &
+ R_xz(1,INDEX_IJK,ispec), &
+ R_yz(1,INDEX_IJK,ispec), &
+ sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
+#endif
endif ! ATTENUATION_VAL
! define symmetric components of sigma for gravity
@@ -2451,7 +1281,7 @@
if(GRAVITY_VAL) then
! use mesh coordinates to get theta and phi
! x y and z contain r theta and phi
- iglob = ibool(ijk,1,1,ispec)
+ iglob = ibool(INDEX_IJK,ispec)
dtheta = dble(ystore(iglob))
dphi = dble(zstore(iglob))
@@ -2492,50 +1322,82 @@
Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * dummyx_loc(ijk,1,1)
- sy_l = rho * dummyy_loc(ijk,1,1)
- sz_l = rho * dummyz_loc(ijk,1,1)
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
- sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
- sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
+ ! get displacement and multiply by density to compute G tensor
+ sx_l = rho * dble(dummyx_loc(INDEX_IJK)) ! dble(displ_crust_mantle(1,iglob))
+ sy_l = rho * dble(dummyy_loc(INDEX_IJK)) ! dble(displ_crust_mantle(2,iglob))
+ sz_l = rho * dble(dummyz_loc(INDEX_IJK)) ! dble(displ_crust_mantle(3,iglob))
- sigma_xy = sigma_xy - sx_l * gyl
- sigma_yx = sigma_yx - sy_l * gxl
+ ! compute G tensor from s . g and add to sigma (not symmetric)
+ sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
+ sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
+ sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
- sigma_xz = sigma_xz - sx_l * gzl
- sigma_zx = sigma_zx - sz_l * gxl
+ sigma_xy = sigma_xy - sngl(sx_l * gyl)
+ sigma_yx = sigma_yx - sngl(sy_l * gxl)
- sigma_yz = sigma_yz - sy_l * gzl
- sigma_zy = sigma_zy - sz_l * gyl
+ sigma_xz = sigma_xz - sngl(sx_l * gzl)
+ sigma_zx = sigma_zx - sngl(sz_l * gxl)
- ! precompute vector
- factor = jacobianl * wgll_cube(ijk,1,1)
- rho_s_H(1,ijk,1,1) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,ijk,1,1) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,ijk,1,1) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+ sigma_yz = sigma_yz - sngl(sy_l * gzl)
+ sigma_zy = sigma_zy - sngl(sz_l * gyl)
+
+ ! precompute vector
+ factor = dble(jacobianl) * wgll_cube(INDEX_IJK)
+ rho_s_H(1,INDEX_IJK) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+ rho_s_H(2,INDEX_IJK) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+ rho_s_H(3,INDEX_IJK) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+
+ else
+
+ ! get displacement and multiply by density to compute G tensor
+ sx_l = rho * dummyx_loc(INDEX_IJK) ! displ_crust_mantle(1,iglob)
+ sy_l = rho * dummyy_loc(INDEX_IJK) ! displ_crust_mantle(2,iglob)
+ sz_l = rho * dummyz_loc(INDEX_IJK) ! displ_crust_mantle(3,iglob)
+
+ ! compute G tensor from s . g and add to sigma (not symmetric)
+ sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
+ sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
+ sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
+
+ sigma_xy = sigma_xy - sx_l * gyl
+ sigma_yx = sigma_yx - sy_l * gxl
+
+ sigma_xz = sigma_xz - sx_l * gzl
+ sigma_zx = sigma_zx - sz_l * gxl
+
+ sigma_yz = sigma_yz - sy_l * gzl
+ sigma_zy = sigma_zy - sz_l * gyl
+
+ ! precompute vector
+ factor = jacobianl * wgll_cube(INDEX_IJK)
+ rho_s_H(1,INDEX_IJK) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+ rho_s_H(2,INDEX_IJK) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+ rho_s_H(3,INDEX_IJK) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+
+ endif
+
endif ! end of section with gravity terms
! form dot product with test vector, non-symmetric form
- tempx1(ijk,1,1) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
- tempy1(ijk,1,1) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
- tempz1(ijk,1,1) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
+ tempx1(INDEX_IJK) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
+ tempy1(INDEX_IJK) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
+ tempz1(INDEX_IJK) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
- tempx2(ijk,1,1) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
- tempy2(ijk,1,1) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
- tempz2(ijk,1,1) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
+ tempx2(INDEX_IJK) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
+ tempy2(INDEX_IJK) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
+ tempz2(INDEX_IJK) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
- tempx3(ijk,1,1) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
- tempy3(ijk,1,1) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
- tempz3(ijk,1,1) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
- enddo
+ tempx3(INDEX_IJK) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
+ tempy3(INDEX_IJK) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
+ tempz3(INDEX_IJK) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
+ ENDDO_LOOP_IJK
+
end subroutine compute_element_aniso
-! end of FORCE_VECTORIZATION
-#endif
!--------------------------------------------------------------------------------------------
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_att_memory.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_att_memory.F90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_att_memory.F90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -38,7 +38,7 @@
alphaval,betaval,gammaval, &
c44store,muvstore, &
epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
- epsilondev_loc) !!!!!!!!!!!!!!!!! ,is_backward_field)
+ epsilondev_loc)
! crust mantle
! update memory variables based upon the Runge-Kutta scheme
@@ -83,8 +83,6 @@
real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
-!!!!!!!!!!!!!!! logical :: is_backward_field
-
! local parameters
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: factor_common_c44_muv
integer :: i_SLS
@@ -314,7 +312,7 @@
alphaval,betaval,gammaval, &
muvstore, &
epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
- epsilondev_loc) !!!!!!!!!!!!! ,is_backward_field)
+ epsilondev_loc)
! inner core
! update memory variables based upon the Runge-Kutta scheme
@@ -356,8 +354,6 @@
real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
-!!!!!!!!!!! logical :: is_backward_field
-
! local parameters
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: factor_common_use
@@ -562,98 +558,99 @@
!
! helper functions
!
-
-
- subroutine compute_element_att_mem_up_cm(ispec,i,j,k, &
- R_xx_loc,R_yy_loc,R_xy_loc,R_xz_loc,R_yz_loc, &
- epsilondev_loc,c44_muv,is_backward_field)
-! crust mantle
-! update memory variables based upon the Runge-Kutta scheme
-
-
-!daniel: att - debug update
- use specfem_par,only: tau_sigma_dble,deltat,b_deltat
-
- use specfem_par_crustmantle,only: factor_common=>factor_common_crust_mantle
-
- use constants_solver
-
- implicit none
-
- ! element id
- integer :: ispec,i,j,k
-
- ! attenuation
- ! memory variables for attenuation
- ! memory variables R_ij are stored at the local rather than global level
- ! to allow for optimization of cache access by compiler
-! real(kind=CUSTOM_REAL), dimension(5,N_SLS) :: R_memory_loc
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xx_loc
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_yy_loc
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xy_loc
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xz_loc
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_yz_loc
-
- real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc
- real(kind=CUSTOM_REAL) :: c44_muv
-
- logical :: is_backward_field
- double precision :: dt,kappa
-
-! local parameters
- real(kind=CUSTOM_REAL) :: factor_common_c44_muv
- integer :: i_SLS
-
- if( .not. is_backward_field ) then
- dt = dble(deltat)
- else
- ! backward/reconstruction: reverse time
- dt = dble(b_deltat)
- endif
-
- do i_SLS = 1,N_SLS
-
- ! runge-kutta scheme to update memory variables R(t)
- if( .false. ) then
-! classical RK 4: R'(t) = - 1/tau * R(t)
!
-! Butcher RK4:
-! 0 |
-! 1/2 | 1/2
-! 1/2 | 0 1/2
-! 1 | 0 1
-! -----------------------------------------------------------------------------
-! 1/6 1/3 1/3 1/6
- kappa = - dt/tau_sigma_dble(i_SLS)
+!daniel debug: att - debug update
+!
+! subroutine compute_element_att_mem_up_cm(ispec,i,j,k, &
+! R_xx_loc,R_yy_loc,R_xy_loc,R_xz_loc,R_yz_loc, &
+! epsilondev_loc,c44_muv,is_backward_field)
+!! crust mantle
+!! update memory variables based upon the Runge-Kutta scheme
+!
+!
+!!daniel: att - debug update
+! use specfem_par,only: tau_sigma_dble,deltat,b_deltat
+!
+! use specfem_par_crustmantle,only: factor_common=>factor_common_crust_mantle
+!
+! use constants_solver
+!
+! implicit none
+!
+! ! element id
+! integer :: ispec,i,j,k
+!
+! ! attenuation
+! ! memory variables for attenuation
+! ! memory variables R_ij are stored at the local rather than global level
+! ! to allow for optimization of cache access by compiler
+!! real(kind=CUSTOM_REAL), dimension(5,N_SLS) :: R_memory_loc
+! real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xx_loc
+! real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_yy_loc
+! real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xy_loc
+! real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xz_loc
+! real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_yz_loc
+!
+! real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc
+! real(kind=CUSTOM_REAL) :: c44_muv
+!
+! logical :: is_backward_field
+! double precision :: dt,kappa
+!
+!! local parameters
+! real(kind=CUSTOM_REAL) :: factor_common_c44_muv
+! integer :: i_SLS
+!
+! if( .not. is_backward_field ) then
+! dt = dble(deltat)
+! else
+! ! backward/reconstruction: reverse time
+! dt = dble(b_deltat)
+! endif
+!
+! do i_SLS = 1,N_SLS
+!
+! ! runge-kutta scheme to update memory variables R(t)
+! if( .false. ) then
+!! classical RK 4: R'(t) = - 1/tau * R(t)
+!!
+!! Butcher RK4:
+!! 0 |
+!! 1/2 | 1/2
+!! 1/2 | 0 1/2
+!! 1 | 0 1
+!! -----------------------------------------------------------------------------
+!! 1/6 1/3 1/3 1/6
+! kappa = - dt/tau_sigma_dble(i_SLS)
+!
+! R_xx_loc(i_SLS) = R_xx_loc(i_SLS) * &
+! (1.0d0 + kappa*(1.d0 + 0.5d0*kappa*(1.d0 + 1.0d0/6.0d0*kappa*(1.d0 + 1.0d0/24.0d0*kappa))))
+! R_yy_loc(i_SLS) = R_yy_loc(i_SLS) * &
+! (1.0d0 + kappa*(1.d0 + 0.5d0*kappa*(1.d0 + 1.0d0/6.0d0*kappa*(1.d0 + 1.0d0/24.0d0*kappa))))
+! R_xy_loc(i_SLS) = R_xy_loc(i_SLS) * &
+! (1.0d0 + kappa*(1.d0 + 0.5d0*kappa*(1.d0 + 1.0d0/6.0d0*kappa*(1.d0 + 1.0d0/24.0d0*kappa))))
+! R_xz_loc(i_SLS) = R_xz_loc(i_SLS) * &
+! (1.0d0 + kappa*(1.d0 + 0.5d0*kappa*(1.d0 + 1.0d0/6.0d0*kappa*(1.d0 + 1.0d0/24.0d0*kappa))))
+! R_yz_loc(i_SLS) = R_yz_loc(i_SLS) * &
+! (1.0d0 + kappa*(1.d0 + 0.5d0*kappa*(1.d0 + 1.0d0/6.0d0*kappa*(1.d0 + 1.0d0/24.0d0*kappa))))
+! endif
+!
+! ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
+! if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
+! factor_common_c44_muv = factor_common(i_SLS,i,j,k,ispec) * c44_muv
+! else
+! factor_common_c44_muv = factor_common(i_SLS,1,1,1,ispec) * c44_muv
+! endif
+!
+! ! adds contributions from current strain
+! R_xx_loc(i_SLS) = R_xx_loc(i_SLS) + 0.5d0 * dt * dble(factor_common_c44_muv) * dble(epsilondev_loc(1))
+! R_yy_loc(i_SLS) = R_yy_loc(i_SLS) + 0.5d0 * dt * dble(factor_common_c44_muv) * dble(epsilondev_loc(2))
+! R_xy_loc(i_SLS) = R_xy_loc(i_SLS) + 0.5d0 * dt * dble(factor_common_c44_muv) * dble(epsilondev_loc(3))
+! R_xz_loc(i_SLS) = R_xz_loc(i_SLS) + 0.5d0 * dt * dble(factor_common_c44_muv) * dble(epsilondev_loc(4))
+! R_yz_loc(i_SLS) = R_yz_loc(i_SLS) + 0.5d0 * dt * dble(factor_common_c44_muv) * dble(epsilondev_loc(5))
+!
+! enddo ! i_SLS
+!
+! end subroutine compute_element_att_mem_up_cm
+!
- R_xx_loc(i_SLS) = R_xx_loc(i_SLS) * &
- (1.0d0 + kappa*(1.d0 + 0.5d0*kappa*(1.d0 + 1.0d0/6.0d0*kappa*(1.d0 + 1.0d0/24.0d0*kappa))))
- R_yy_loc(i_SLS) = R_yy_loc(i_SLS) * &
- (1.0d0 + kappa*(1.d0 + 0.5d0*kappa*(1.d0 + 1.0d0/6.0d0*kappa*(1.d0 + 1.0d0/24.0d0*kappa))))
- R_xy_loc(i_SLS) = R_xy_loc(i_SLS) * &
- (1.0d0 + kappa*(1.d0 + 0.5d0*kappa*(1.d0 + 1.0d0/6.0d0*kappa*(1.d0 + 1.0d0/24.0d0*kappa))))
- R_xz_loc(i_SLS) = R_xz_loc(i_SLS) * &
- (1.0d0 + kappa*(1.d0 + 0.5d0*kappa*(1.d0 + 1.0d0/6.0d0*kappa*(1.d0 + 1.0d0/24.0d0*kappa))))
- R_yz_loc(i_SLS) = R_yz_loc(i_SLS) * &
- (1.0d0 + kappa*(1.d0 + 0.5d0*kappa*(1.d0 + 1.0d0/6.0d0*kappa*(1.d0 + 1.0d0/24.0d0*kappa))))
- endif
-
- ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
- if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- factor_common_c44_muv = factor_common(i_SLS,i,j,k,ispec) * c44_muv
- else
- factor_common_c44_muv = factor_common(i_SLS,1,1,1,ispec) * c44_muv
- endif
-
- ! adds contributions from current strain
- R_xx_loc(i_SLS) = R_xx_loc(i_SLS) + 0.5d0 * dt * dble(factor_common_c44_muv) * dble(epsilondev_loc(1))
- R_yy_loc(i_SLS) = R_yy_loc(i_SLS) + 0.5d0 * dt * dble(factor_common_c44_muv) * dble(epsilondev_loc(2))
- R_xy_loc(i_SLS) = R_xy_loc(i_SLS) + 0.5d0 * dt * dble(factor_common_c44_muv) * dble(epsilondev_loc(3))
- R_xz_loc(i_SLS) = R_xz_loc(i_SLS) + 0.5d0 * dt * dble(factor_common_c44_muv) * dble(epsilondev_loc(4))
- R_yz_loc(i_SLS) = R_yz_loc(i_SLS) + 0.5d0 * dt * dble(factor_common_c44_muv) * dble(epsilondev_loc(5))
-
- enddo ! i_SLS
-
- end subroutine compute_element_att_mem_up_cm
-
-
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_strain.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_strain.F90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_strain.F90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -25,8 +25,12 @@
!
!=====================================================================
+! we switch between vectorized and non-vectorized version by using pre-processor flag FORCE_VECTORIZATION
+! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
+#include "config.fh"
+
subroutine compute_element_strain_undo_att_Dev(ispec,nglob,nspec, &
displ,ibool, &
hprime_xx,hprime_xxT,&
@@ -53,7 +57,9 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(out) :: eps_trace_over_3_loc
! local variable
- integer :: i,j,k,iglob
+ integer :: iglob
+ integer :: i,j,k
+
real(kind=CUSTOM_REAL) :: templ
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
@@ -93,26 +99,16 @@
integer :: ijk
#endif
-#ifdef FORCE_VECTORIZATION
- do ijk=1,NGLLCUBE
- iglob = ibool(ijk,1,1,ispec)
- dummyx_loc(ijk,1,1) = displ(1,iglob)
- dummyy_loc(ijk,1,1) = displ(2,iglob)
- dummyz_loc(ijk,1,1) = displ(3,iglob)
- enddo
-#else
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
- dummyx_loc(i,j,k) = displ(1,iglob)
- dummyy_loc(i,j,k) = displ(2,iglob)
- dummyz_loc(i,j,k) = displ(3,iglob)
- enddo
- enddo
- enddo
-#endif
+ DO_LOOP_IJK
+ iglob = ibool(INDEX_IJK,ispec)
+ dummyx_loc(INDEX_IJK) = displ(1,iglob)
+ dummyy_loc(INDEX_IJK) = displ(2,iglob)
+ dummyz_loc(INDEX_IJK) = displ(3,iglob)
+
+ ENDDO_LOOP_IJK
+
+ ! deville optimizations
do j=1,m2
do i=1,m1
C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
@@ -182,35 +178,35 @@
enddo
enddo
-#ifdef FORCE_VECTORIZATION
- do ijk=1,NGLLCUBE
+ DO_LOOP_IJK
+
! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(ijk,1,1,ispec)
- xiyl = xiy(ijk,1,1,ispec)
- xizl = xiz(ijk,1,1,ispec)
- etaxl = etax(ijk,1,1,ispec)
- etayl = etay(ijk,1,1,ispec)
- etazl = etaz(ijk,1,1,ispec)
- gammaxl = gammax(ijk,1,1,ispec)
- gammayl = gammay(ijk,1,1,ispec)
- gammazl = gammaz(ijk,1,1,ispec)
+ xixl = xix(INDEX_IJK,ispec)
+ xiyl = xiy(INDEX_IJK,ispec)
+ xizl = xiz(INDEX_IJK,ispec)
+ etaxl = etax(INDEX_IJK,ispec)
+ etayl = etay(INDEX_IJK,ispec)
+ etazl = etaz(INDEX_IJK,ispec)
+ gammaxl = gammax(INDEX_IJK,ispec)
+ gammayl = gammay(INDEX_IJK,ispec)
+ gammazl = gammaz(INDEX_IJK,ispec)
! compute the jacobian
jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- xiyl*(etaxl*gammazl-etazl*gammaxl) &
+ xizl*(etaxl*gammayl-etayl*gammaxl))
- duxdxl = xixl*tempx1(ijk,1,1) + etaxl*tempx2(ijk,1,1) + gammaxl*tempx3(ijk,1,1)
- duxdyl = xiyl*tempx1(ijk,1,1) + etayl*tempx2(ijk,1,1) + gammayl*tempx3(ijk,1,1)
- duxdzl = xizl*tempx1(ijk,1,1) + etazl*tempx2(ijk,1,1) + gammazl*tempx3(ijk,1,1)
+ duxdxl = xixl*tempx1(INDEX_IJK) + etaxl*tempx2(INDEX_IJK) + gammaxl*tempx3(INDEX_IJK)
+ duxdyl = xiyl*tempx1(INDEX_IJK) + etayl*tempx2(INDEX_IJK) + gammayl*tempx3(INDEX_IJK)
+ duxdzl = xizl*tempx1(INDEX_IJK) + etazl*tempx2(INDEX_IJK) + gammazl*tempx3(INDEX_IJK)
- duydxl = xixl*tempy1(ijk,1,1) + etaxl*tempy2(ijk,1,1) + gammaxl*tempy3(ijk,1,1)
- duydyl = xiyl*tempy1(ijk,1,1) + etayl*tempy2(ijk,1,1) + gammayl*tempy3(ijk,1,1)
- duydzl = xizl*tempy1(ijk,1,1) + etazl*tempy2(ijk,1,1) + gammazl*tempy3(ijk,1,1)
+ duydxl = xixl*tempy1(INDEX_IJK) + etaxl*tempy2(INDEX_IJK) + gammaxl*tempy3(INDEX_IJK)
+ duydyl = xiyl*tempy1(INDEX_IJK) + etayl*tempy2(INDEX_IJK) + gammayl*tempy3(INDEX_IJK)
+ duydzl = xizl*tempy1(INDEX_IJK) + etazl*tempy2(INDEX_IJK) + gammazl*tempy3(INDEX_IJK)
- duzdxl = xixl*tempz1(ijk,1,1) + etaxl*tempz2(ijk,1,1) + gammaxl*tempz3(ijk,1,1)
- duzdyl = xiyl*tempz1(ijk,1,1) + etayl*tempz2(ijk,1,1) + gammayl*tempz3(ijk,1,1)
- duzdzl = xizl*tempz1(ijk,1,1) + etazl*tempz2(ijk,1,1) + gammazl*tempz3(ijk,1,1)
+ duzdxl = xixl*tempz1(INDEX_IJK) + etaxl*tempz2(INDEX_IJK) + gammaxl*tempz3(INDEX_IJK)
+ duzdyl = xiyl*tempz1(INDEX_IJK) + etayl*tempz2(INDEX_IJK) + gammayl*tempz3(INDEX_IJK)
+ duzdzl = xizl*tempz1(INDEX_IJK) + etazl*tempz2(INDEX_IJK) + gammazl*tempz3(INDEX_IJK)
! precompute some sums to save CPU time
duxdxl_plus_duydyl = duxdxl + duydyl
@@ -222,66 +218,15 @@
! strains
templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
- eps_trace_over_3_loc(ijk,1,1) = templ
- epsilondev_loc(1,ijk,1,1) = duxdxl - templ
- epsilondev_loc(2,ijk,1,1) = duydyl - templ
- epsilondev_loc(3,ijk,1,1) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,ijk,1,1) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,ijk,1,1) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
- enddo
-#else
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- ! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(i,j,k,ispec)
- xiyl = xiy(i,j,k,ispec)
- xizl = xiz(i,j,k,ispec)
- etaxl = etax(i,j,k,ispec)
- etayl = etay(i,j,k,ispec)
- etazl = etaz(i,j,k,ispec)
- gammaxl = gammax(i,j,k,ispec)
- gammayl = gammay(i,j,k,ispec)
- gammazl = gammaz(i,j,k,ispec)
+ eps_trace_over_3_loc(INDEX_IJK) = templ
+ epsilondev_loc(1,INDEX_IJK) = duxdxl - templ
+ epsilondev_loc(2,INDEX_IJK) = duydyl - templ
+ epsilondev_loc(3,INDEX_IJK) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_loc(4,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_loc(5,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
- ! compute the jacobian
- jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
+ ENDDO_LOOP_IJK
- duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
- duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
- duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
- duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
- duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
- duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
-
- duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
- duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
- duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
-
- ! precompute some sums to save CPU time
- duxdxl_plus_duydyl = duxdxl + duydyl
- duxdxl_plus_duzdzl = duxdxl + duzdzl
- duydyl_plus_duzdzl = duydyl + duzdzl
- duxdyl_plus_duydxl = duxdyl + duydxl
- duzdxl_plus_duxdzl = duzdxl + duxdzl
- duzdyl_plus_duydzl = duzdyl + duydzl
-
- ! strains
- templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
- eps_trace_over_3_loc(i,j,k) = templ
- epsilondev_loc(1,i,j,k) = duxdxl - templ
- epsilondev_loc(2,i,j,k) = duydyl - templ
- epsilondev_loc(3,i,j,k) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,i,j,k) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,i,j,k) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
- enddo
- enddo
- enddo
-#endif
-
end subroutine compute_element_strain_undo_att_Dev
@@ -470,7 +415,9 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec_strain_only) :: eps_trace_over_3_loc_nplus1
! local variable
- integer :: i,j,k,iglob
+ integer :: iglob
+ integer :: i,j,k
+
real(kind=CUSTOM_REAL) :: templ
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
@@ -503,26 +450,16 @@
integer :: ijk
#endif
-#ifdef FORCE_VECTORIZATION
- do ijk=1,NGLLCUBE
- iglob = ibool(ijk,1,1,ispec)
- dummyx_loc(ijk,1,1) = displ(1,iglob) + deltat * veloc(1,iglob)
- dummyy_loc(ijk,1,1) = displ(2,iglob) + deltat * veloc(2,iglob)
- dummyz_loc(ijk,1,1) = displ(3,iglob) + deltat * veloc(3,iglob)
- enddo
-#else
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
- dummyx_loc(i,j,k) = displ(1,iglob) + deltat * veloc(1,iglob)
- dummyy_loc(i,j,k) = displ(2,iglob) + deltat * veloc(2,iglob)
- dummyz_loc(i,j,k) = displ(3,iglob) + deltat * veloc(3,iglob)
- enddo
- enddo
- enddo
-#endif
+ DO_LOOP_IJK
+ iglob = ibool(INDEX_IJK,ispec)
+ dummyx_loc(INDEX_IJK) = displ(1,iglob) + deltat * veloc(1,iglob)
+ dummyy_loc(INDEX_IJK) = displ(2,iglob) + deltat * veloc(2,iglob)
+ dummyz_loc(INDEX_IJK) = displ(3,iglob) + deltat * veloc(3,iglob)
+
+ ENDDO_LOOP_IJK
+
+ ! deville optimizations
do j=1,m2
do i=1,m1
C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
@@ -592,35 +529,35 @@
enddo
enddo
-#ifdef FORCE_VECTORIZATION
- do ijk=1,NGLLCUBE
+ DO_LOOP_IJK
+
! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(ijk,1,1,ispec)
- xiyl = xiy(ijk,1,1,ispec)
- xizl = xiz(ijk,1,1,ispec)
- etaxl = etax(ijk,1,1,ispec)
- etayl = etay(ijk,1,1,ispec)
- etazl = etaz(ijk,1,1,ispec)
- gammaxl = gammax(ijk,1,1,ispec)
- gammayl = gammay(ijk,1,1,ispec)
- gammazl = gammaz(ijk,1,1,ispec)
+ xixl = xix(INDEX_IJK,ispec)
+ xiyl = xiy(INDEX_IJK,ispec)
+ xizl = xiz(INDEX_IJK,ispec)
+ etaxl = etax(INDEX_IJK,ispec)
+ etayl = etay(INDEX_IJK,ispec)
+ etazl = etaz(INDEX_IJK,ispec)
+ gammaxl = gammax(INDEX_IJK,ispec)
+ gammayl = gammay(INDEX_IJK,ispec)
+ gammazl = gammaz(INDEX_IJK,ispec)
! compute the jacobian
jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- xiyl*(etaxl*gammazl-etazl*gammaxl) &
+ xizl*(etaxl*gammayl-etayl*gammaxl))
- duxdxl = xixl*tempx1(ijk,1,1) + etaxl*tempx2(ijk,1,1) + gammaxl*tempx3(ijk,1,1)
- duxdyl = xiyl*tempx1(ijk,1,1) + etayl*tempx2(ijk,1,1) + gammayl*tempx3(ijk,1,1)
- duxdzl = xizl*tempx1(ijk,1,1) + etazl*tempx2(ijk,1,1) + gammazl*tempx3(ijk,1,1)
+ duxdxl = xixl*tempx1(INDEX_IJK) + etaxl*tempx2(INDEX_IJK) + gammaxl*tempx3(INDEX_IJK)
+ duxdyl = xiyl*tempx1(INDEX_IJK) + etayl*tempx2(INDEX_IJK) + gammayl*tempx3(INDEX_IJK)
+ duxdzl = xizl*tempx1(INDEX_IJK) + etazl*tempx2(INDEX_IJK) + gammazl*tempx3(INDEX_IJK)
- duydxl = xixl*tempy1(ijk,1,1) + etaxl*tempy2(ijk,1,1) + gammaxl*tempy3(ijk,1,1)
- duydyl = xiyl*tempy1(ijk,1,1) + etayl*tempy2(ijk,1,1) + gammayl*tempy3(ijk,1,1)
- duydzl = xizl*tempy1(ijk,1,1) + etazl*tempy2(ijk,1,1) + gammazl*tempy3(ijk,1,1)
+ duydxl = xixl*tempy1(INDEX_IJK) + etaxl*tempy2(INDEX_IJK) + gammaxl*tempy3(INDEX_IJK)
+ duydyl = xiyl*tempy1(INDEX_IJK) + etayl*tempy2(INDEX_IJK) + gammayl*tempy3(INDEX_IJK)
+ duydzl = xizl*tempy1(INDEX_IJK) + etazl*tempy2(INDEX_IJK) + gammazl*tempy3(INDEX_IJK)
- duzdxl = xixl*tempz1(ijk,1,1) + etaxl*tempz2(ijk,1,1) + gammaxl*tempz3(ijk,1,1)
- duzdyl = xiyl*tempz1(ijk,1,1) + etayl*tempz2(ijk,1,1) + gammayl*tempz3(ijk,1,1)
- duzdzl = xizl*tempz1(ijk,1,1) + etazl*tempz2(ijk,1,1) + gammazl*tempz3(ijk,1,1)
+ duzdxl = xixl*tempz1(INDEX_IJK) + etaxl*tempz2(INDEX_IJK) + gammaxl*tempz3(INDEX_IJK)
+ duzdyl = xiyl*tempz1(INDEX_IJK) + etayl*tempz2(INDEX_IJK) + gammayl*tempz3(INDEX_IJK)
+ duzdzl = xizl*tempz1(INDEX_IJK) + etazl*tempz2(INDEX_IJK) + gammazl*tempz3(INDEX_IJK)
! precompute some sums to save CPU time
duxdxl_plus_duydyl = duxdxl + duydyl
@@ -633,75 +570,19 @@
templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
if( nspec_strain_only == 1 ) then
if( ispec == 1 ) then
- eps_trace_over_3_loc_nplus1(ijk,1,1,1) = templ
+ eps_trace_over_3_loc_nplus1(INDEX_IJK,1) = templ
endif
else
- eps_trace_over_3_loc_nplus1(ijk,1,1,ispec) = templ
+ eps_trace_over_3_loc_nplus1(INDEX_IJK,ispec) = templ
endif
- epsilondev_xx_loc_nplus1(ijk,1,1,ispec) = duxdxl - templ
- epsilondev_yy_loc_nplus1(ijk,1,1,ispec) = duydyl - templ
- epsilondev_xy_loc_nplus1(ijk,1,1,ispec) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_xz_loc_nplus1(ijk,1,1,ispec) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_yz_loc_nplus1(ijk,1,1,ispec) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
- enddo
-#else
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- ! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(i,j,k,ispec)
- xiyl = xiy(i,j,k,ispec)
- xizl = xiz(i,j,k,ispec)
- etaxl = etax(i,j,k,ispec)
- etayl = etay(i,j,k,ispec)
- etazl = etaz(i,j,k,ispec)
- gammaxl = gammax(i,j,k,ispec)
- gammayl = gammay(i,j,k,ispec)
- gammazl = gammaz(i,j,k,ispec)
+ epsilondev_xx_loc_nplus1(INDEX_IJK,ispec) = duxdxl - templ
+ epsilondev_yy_loc_nplus1(INDEX_IJK,ispec) = duydyl - templ
+ epsilondev_xy_loc_nplus1(INDEX_IJK,ispec) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_xz_loc_nplus1(INDEX_IJK,ispec) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_yz_loc_nplus1(INDEX_IJK,ispec) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
- ! compute the jacobian
- jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
+ ENDDO_LOOP_IJK
- duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
- duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
- duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
- duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
- duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
- duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
-
- duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
- duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
- duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
-
- ! precompute some sums to save CPU time
- duxdxl_plus_duydyl = duxdxl + duydyl
- duxdxl_plus_duzdzl = duxdxl + duzdzl
- duydyl_plus_duzdzl = duydyl + duzdzl
- duxdyl_plus_duydxl = duxdyl + duydxl
- duzdxl_plus_duxdzl = duzdxl + duxdzl
- duzdyl_plus_duydzl = duzdyl + duydzl
-
- templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
- if( nspec_strain_only == 1 ) then
- if( ispec == 1 ) then
- eps_trace_over_3_loc_nplus1(i,j,k,1) = templ
- endif
- else
- eps_trace_over_3_loc_nplus1(i,j,k,ispec) = templ
- endif
- epsilondev_xx_loc_nplus1(i,j,k,ispec) = duxdxl - templ
- epsilondev_yy_loc_nplus1(i,j,k,ispec) = duydyl - templ
- epsilondev_xy_loc_nplus1(i,j,k,ispec) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_xz_loc_nplus1(i,j,k,ispec) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_yz_loc_nplus1(i,j,k,ispec) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
- enddo
- enddo
- enddo
-#endif
-
end subroutine compute_element_strain_att_Dev
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -25,10 +25,14 @@
!
!=====================================================================
+! we switch between vectorized and non-vectorized version by using pre-processor flag FORCE_VECTORIZATION
+! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
+#include "config.fh"
+
+
subroutine compute_forces_crust_mantle_Dev( NSPEC,NGLOB,NSPEC_ATT, &
deltat, &
displ_crust_mantle, &
-! veloc_crust_mantle, &
accel_crust_mantle, &
phase_is_inner, &
R_xx,R_yy,R_xy,R_xz,R_yz, &
@@ -37,7 +41,7 @@
epsilondev_xz,epsilondev_yz, &
epsilon_trace_over_3, &
alphaval,betaval,gammaval, &
- factor_common,vnspec,is_backward_field)
+ factor_common,vnspec )
! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
@@ -71,11 +75,7 @@
nspec_outer => nspec_outer_crust_mantle, &
nspec_inner => nspec_inner_crust_mantle
-#ifdef FORCE_VECTORIZATION
use specfem_par,only: wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D
-#else
- use specfem_par,only: wgllwgll_xy,wgllwgll_xz,wgllwgll_yz
-#endif
!daniel: att - debug
! use specfem_par,only: it,NSTEP
@@ -113,8 +113,6 @@
! inner/outer element run flag
logical :: phase_is_inner
- logical :: is_backward_field
-
! local parameters
! Deville
@@ -162,8 +160,8 @@
! for gravity
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
- integer :: ispec,i,j,k,iglob
-
+ integer :: ispec,iglob
+ integer :: i,j,k
integer :: num_elements,ispec_p
integer :: iphase
@@ -190,30 +188,15 @@
! only compute element which belong to current phase (inner or outer elements)
- ! subroutines adapted from Deville, Fischer and Mund, High-order methods
- ! for incompressible fluid flow, Cambridge University Press (2002),
- ! pages 386 and 389 and Figure 8.3.1
+ DO_LOOP_IJK
-#ifdef FORCE_VECTORIZATION
- do ijk=1,NGLLCUBE
- iglob = ibool(ijk,1,1,ispec)
- dummyx_loc(ijk,1,1) = displ_crust_mantle(1,iglob)
- dummyy_loc(ijk,1,1) = displ_crust_mantle(2,iglob)
- dummyz_loc(ijk,1,1) = displ_crust_mantle(3,iglob)
- enddo
-#else
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
- dummyx_loc(i,j,k) = displ_crust_mantle(1,iglob)
- dummyy_loc(i,j,k) = displ_crust_mantle(2,iglob)
- dummyz_loc(i,j,k) = displ_crust_mantle(3,iglob)
- enddo
- enddo
- enddo
-#endif
+ iglob = ibool(INDEX_IJK,ispec)
+ dummyx_loc(INDEX_IJK) = displ_crust_mantle(1,iglob)
+ dummyy_loc(INDEX_IJK) = displ_crust_mantle(2,iglob)
+ dummyz_loc(INDEX_IJK) = displ_crust_mantle(3,iglob)
+ ENDDO_LOOP_IJK
+
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
@@ -292,54 +275,51 @@
if(ANISOTROPIC_3D_MANTLE_VAL) then
! anisotropic element
call compute_element_aniso(ispec, &
- minus_gravity_table,density_table,minus_deriv_gravity_table, &
- xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- wgll_cube, &
- c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
- c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
- c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
- ibool, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilon_trace_over_3, &
- one_minus_sum_beta,vnspec, &
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
- dummyx_loc,dummyy_loc,dummyz_loc, &
- epsilondev_loc, &
- rho_s_H,is_backward_field)
+ minus_gravity_table,density_table,minus_deriv_gravity_table, &
+ xstore,ystore,zstore, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ wgll_cube, &
+ c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
+ c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
+ c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
+ ibool, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilon_trace_over_3, &
+ one_minus_sum_beta,vnspec, &
+ tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
+ dummyx_loc,dummyy_loc,dummyz_loc, &
+ epsilondev_loc,rho_s_H)
else
if(.not. ispec_is_tiso(ispec)) then
! isotropic element
call compute_element_iso(ispec, &
- minus_gravity_table,density_table,minus_deriv_gravity_table, &
- xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- wgll_cube, &
- kappavstore,muvstore, &
- ibool, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilon_trace_over_3, &
- one_minus_sum_beta,vnspec, &
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
- dummyx_loc,dummyy_loc,dummyz_loc, &
- epsilondev_loc, &
- rho_s_H,is_backward_field)
+ minus_gravity_table,density_table,minus_deriv_gravity_table, &
+ xstore,ystore,zstore, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ wgll_cube, &
+ kappavstore,muvstore, &
+ ibool, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilon_trace_over_3, &
+ one_minus_sum_beta,vnspec, &
+ tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
+ dummyx_loc,dummyy_loc,dummyz_loc, &
+ epsilondev_loc,rho_s_H)
else
! transverse isotropic element
call compute_element_tiso(ispec, &
- minus_gravity_table,density_table,minus_deriv_gravity_table, &
- xstore,ystore,zstore, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- wgll_cube, &
- kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
- ibool, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilon_trace_over_3, &
- one_minus_sum_beta,vnspec, &
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
- dummyx_loc,dummyy_loc,dummyz_loc, &
- epsilondev_loc, &
- rho_s_H,is_backward_field)
+ minus_gravity_table,density_table,minus_deriv_gravity_table, &
+ xstore,ystore,zstore, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ wgll_cube, &
+ kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
+ ibool, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilon_trace_over_3, &
+ one_minus_sum_beta,vnspec, &
+ tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
+ dummyx_loc,dummyy_loc,dummyz_loc, &
+ epsilondev_loc,rho_s_H)
endif ! .not. ispec_is_tiso
endif
@@ -416,40 +396,42 @@
enddo
! sum contributions
-#ifdef FORCE_VECTORIZATION
- do ijk=1,NGLLCUBE
- fac1 = wgllwgll_yz_3D(ijk,1,1)
- fac2 = wgllwgll_xz_3D(ijk,1,1)
- fac3 = wgllwgll_xy_3D(ijk,1,1)
- sum_terms(1,ijk,1,1) = - (fac1*newtempx1(ijk,1,1) + fac2*newtempx2(ijk,1,1) + fac3*newtempx3(ijk,1,1))
- sum_terms(2,ijk,1,1) = - (fac1*newtempy1(ijk,1,1) + fac2*newtempy2(ijk,1,1) + fac3*newtempy3(ijk,1,1))
- sum_terms(3,ijk,1,1) = - (fac1*newtempz1(ijk,1,1) + fac2*newtempz2(ijk,1,1) + fac3*newtempz3(ijk,1,1))
- enddo
+
+ DO_LOOP_IJK
+
+ fac1 = wgllwgll_yz_3D(INDEX_IJK)
+ fac2 = wgllwgll_xz_3D(INDEX_IJK)
+ fac3 = wgllwgll_xy_3D(INDEX_IJK)
+ sum_terms(1,INDEX_IJK) = - (fac1*newtempx1(INDEX_IJK) + fac2*newtempx2(INDEX_IJK) + fac3*newtempx3(INDEX_IJK))
+ sum_terms(2,INDEX_IJK) = - (fac1*newtempy1(INDEX_IJK) + fac2*newtempy2(INDEX_IJK) + fac3*newtempy3(INDEX_IJK))
+ sum_terms(3,INDEX_IJK) = - (fac1*newtempz1(INDEX_IJK) + fac2*newtempz2(INDEX_IJK) + fac3*newtempz3(INDEX_IJK))
+
+ ENDDO_LOOP_IJK
+
! adds gravity terms
if(GRAVITY_VAL) then
+
+#ifdef FORCE_VECTORIZATION
do ijk = 1,NDIM*NGLLCUBE
sum_terms(ijk,1,1,1) = sum_terms(ijk,1,1,1) + rho_s_H(ijk,1,1,1)
enddo
- endif
#else
- do k=1,NGLLZ
- do j=1,NGLLY
- fac1 = wgllwgll_yz(j,k)
- do i=1,NGLLX
- fac2 = wgllwgll_xz(i,k)
- fac3 = wgllwgll_xy(i,j)
- ! sums contributions
- sum_terms(1,i,j,k) = - (fac1*newtempx1(i,j,k) + fac2*newtempx2(i,j,k) + fac3*newtempx3(i,j,k))
- sum_terms(2,i,j,k) = - (fac1*newtempy1(i,j,k) + fac2*newtempy2(i,j,k) + fac3*newtempy3(i,j,k))
- sum_terms(3,i,j,k) = - (fac1*newtempz1(i,j,k) + fac2*newtempz2(i,j,k) + fac3*newtempz3(i,j,k))
- ! adds gravity terms
- if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
- enddo ! NGLLX
- enddo ! NGLLY
- enddo ! NGLLZ
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ sum_terms(1,INDEX_IJK) = sum_terms(1,INDEX_IJK) + rho_s_H(1,INDEX_IJK)
+ sum_terms(2,INDEX_IJK) = sum_terms(2,INDEX_IJK) + rho_s_H(2,INDEX_IJK)
+ sum_terms(3,INDEX_IJK) = sum_terms(3,INDEX_IJK) + rho_s_H(3,INDEX_IJK)
+ enddo
+ enddo
+ enddo
#endif
- ! sum contributions from each element to the global mesh and add gravity terms
+ endif
+
+
+ ! updates acceleration
+
#ifdef FORCE_VECTORIZATION
! we can force vectorization using a compiler directive here because we know that there is no dependency
! inside a given spectral element, since all the global points of a local elements are different by definition
@@ -459,21 +441,23 @@
!pgi$ ivdep
!DIR$ IVDEP
do ijk = 1,NGLLCUBE
- iglob = ibool(ijk,1,1,ispec)
- ! do NOT use array syntax ":" for the three statements below otherwise most compilers
- ! will not be able to vectorize the outer loop
- accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,ijk,1,1)
- accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,ijk,1,1)
- accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,ijk,1,1)
- enddo
#else
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
- accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,i,j,k)
- accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,i,j,k)
- accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,i,j,k)
+#endif
+
+ iglob = ibool(INDEX_IJK,ispec)
+
+ ! do NOT use array syntax ":" for the three statements below otherwise most compilers
+ ! will not be able to vectorize the outer loop
+ accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,INDEX_IJK)
+ accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,INDEX_IJK)
+ accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,INDEX_IJK)
+
+#ifdef FORCE_VECTORIZATION
+ enddo
+#else
enddo
enddo
enddo
@@ -511,7 +495,7 @@
c44store,muvstore, &
epsilondev_xx,epsilondev_yy,epsilondev_xy, &
epsilondev_xz,epsilondev_yz, &
- epsilondev_loc) !!!!!!!!!!!!!!! ,is_backward_field)
+ epsilondev_loc)
endif
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_noDev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_noDev.f90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_noDev.f90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -28,7 +28,6 @@
subroutine compute_forces_crust_mantle(NSPEC,NGLOB,NSPEC_ATT, &
deltat, &
displ_crust_mantle, &
-! veloc_crust_mantle, &
accel_crust_mantle, &
phase_is_inner, &
R_xx,R_yy,R_xy,R_xz,R_yz, &
@@ -37,7 +36,7 @@
epsilondev_xz,epsilondev_yz, &
epsilon_trace_over_3, &
alphaval,betaval,gammaval, &
- factor_common,vnspec) !!!!!!!!!!!!!!!!!!!!! ,is_backward_field)
+ factor_common,vnspec)
use constants_solver
@@ -78,7 +77,6 @@
! displacement, velocity and acceleration
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_crust_mantle
-! real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: veloc_crust_mantle
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_crust_mantle
! variable sized array variables
@@ -102,8 +100,6 @@
! inner/outer element run flag
logical :: phase_is_inner
-!!!!!!!!!!!!!!! logical :: is_backward_field
-
! local parameters
! for attenuation
@@ -829,7 +825,7 @@
c44store,muvstore, &
epsilondev_xx,epsilondev_yy,epsilondev_xy, &
epsilondev_xz,epsilondev_yz, &
- epsilondev_loc) !!!!!!!!!!!!!!! ,is_backward_field)
+ epsilondev_loc)
endif
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -25,10 +25,15 @@
!
!=====================================================================
+! we switch between vectorized and non-vectorized version by using pre-processor flag FORCE_VECTORIZATION
+! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
+#include "config.fh"
+
+
+
subroutine compute_forces_inner_core_Dev( NSPEC,NGLOB,NSPEC_ATT, &
deltat, &
displ_inner_core, &
-! veloc_inner_core, &
accel_inner_core, &
phase_is_inner, &
R_xx,R_yy,R_xy,R_xz,R_yz, &
@@ -36,8 +41,8 @@
epsilondev_xx,epsilondev_yy,epsilondev_xy, &
epsilondev_xz,epsilondev_yz, &
epsilon_trace_over_3,&
- alphaval,betaval,gammaval,factor_common, &
- vnspec) !!!!!!!!!!!!!!!!! ,is_backward_field)
+ alphaval,betaval,gammaval, &
+ factor_common,vnspec)
! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
@@ -64,11 +69,7 @@
nspec_outer => nspec_outer_inner_core, &
nspec_inner => nspec_inner_inner_core
-#ifdef FORCE_VECTORIZATION
use specfem_par,only: wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D
-#else
- use specfem_par,only: wgllwgll_xy,wgllwgll_xz,wgllwgll_yz
-#endif
implicit none
@@ -79,7 +80,6 @@
! displacement and acceleration
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_inner_core
-! real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: veloc_inner_core
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_inner_core
! for attenuation
@@ -103,8 +103,6 @@
! inner/outer element run flag
logical :: phase_is_inner
-!!!!!!!!!!!!!!! logical :: is_backward_field
-
! local parameters
! Deville
@@ -113,6 +111,7 @@
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
+
real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_5points,E2_m1_m2_5points,E3_m1_m2_5points
@@ -127,7 +126,6 @@
equivalence(newtempy1,E2_m1_m2_5points)
equivalence(newtempz1,E3_m1_m2_5points)
-
real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
@@ -157,9 +155,10 @@
real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
- real(kind=CUSTOM_REAL) fac1,fac2,fac3,templ
+ real(kind=CUSTOM_REAL) fac1,fac2,fac3
real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
real(kind=CUSTOM_REAL) kappal
+ real(kind=CUSTOM_REAL) templ
real(kind=CUSTOM_REAL) minus_sum_beta
real(kind=CUSTOM_REAL) c11l,c33l,c12l,c13l,c44l
@@ -175,15 +174,17 @@
real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
integer :: int_radius
- integer :: ispec
+ integer :: ispec,iglob
integer :: i,j,k
- integer :: iglob
-! integer :: computed_elements
integer :: num_elements,ispec_p
integer :: iphase
#ifdef FORCE_VECTORIZATION
+! in this vectorized version we have to assume that N_SLS == 3 in order to be able to unroll and thus suppress
+! an inner loop that would otherwise prevent vectorization; this is safe in practice in all cases because N_SLS == 3
+! in all known applications, and in the main program we check that N_SLS == 3 if FORCE_VECTORIZATION is used and we stop
integer :: ijk
+ real(kind=CUSTOM_REAL) :: R_xx_val,R_yy_val
#endif
! ****************************************************
@@ -209,21 +210,18 @@
! exclude fictitious elements in central cube
if(idoubling(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then
+ DO_LOOP_IJK
+
+ iglob = ibool(INDEX_IJK,ispec)
+ dummyx_loc(INDEX_IJK) = displ_inner_core(1,iglob)
+ dummyy_loc(INDEX_IJK) = displ_inner_core(2,iglob)
+ dummyz_loc(INDEX_IJK) = displ_inner_core(3,iglob)
+
+ ENDDO_LOOP_IJK
+
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
-
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
- dummyx_loc(i,j,k) = displ_inner_core(1,iglob)
- dummyy_loc(i,j,k) = displ_inner_core(2,iglob)
- dummyz_loc(i,j,k) = displ_inner_core(3,iglob)
- enddo
- enddo
- enddo
-
do j=1,m2
do i=1,m1
C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
@@ -293,281 +291,311 @@
enddo
enddo
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
+ DO_LOOP_IJK
- ! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(i,j,k,ispec)
- xiyl = xiy(i,j,k,ispec)
- xizl = xiz(i,j,k,ispec)
- etaxl = etax(i,j,k,ispec)
- etayl = etay(i,j,k,ispec)
- etazl = etaz(i,j,k,ispec)
- gammaxl = gammax(i,j,k,ispec)
- gammayl = gammay(i,j,k,ispec)
- gammazl = gammaz(i,j,k,ispec)
+ ! get derivatives of ux, uy and uz with respect to x, y and z
+ xixl = xix(INDEX_IJK,ispec)
+ xiyl = xiy(INDEX_IJK,ispec)
+ xizl = xiz(INDEX_IJK,ispec)
+ etaxl = etax(INDEX_IJK,ispec)
+ etayl = etay(INDEX_IJK,ispec)
+ etazl = etaz(INDEX_IJK,ispec)
+ gammaxl = gammax(INDEX_IJK,ispec)
+ gammayl = gammay(INDEX_IJK,ispec)
+ gammazl = gammaz(INDEX_IJK,ispec)
- ! compute the jacobian
- jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
+ ! compute the jacobian
+ jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+ - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+ + xizl*(etaxl*gammayl-etayl*gammaxl))
- duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
- duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
- duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
+ duxdxl = xixl*tempx1(INDEX_IJK) + etaxl*tempx2(INDEX_IJK) + gammaxl*tempx3(INDEX_IJK)
+ duxdyl = xiyl*tempx1(INDEX_IJK) + etayl*tempx2(INDEX_IJK) + gammayl*tempx3(INDEX_IJK)
+ duxdzl = xizl*tempx1(INDEX_IJK) + etazl*tempx2(INDEX_IJK) + gammazl*tempx3(INDEX_IJK)
- duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
- duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
- duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
+ duydxl = xixl*tempy1(INDEX_IJK) + etaxl*tempy2(INDEX_IJK) + gammaxl*tempy3(INDEX_IJK)
+ duydyl = xiyl*tempy1(INDEX_IJK) + etayl*tempy2(INDEX_IJK) + gammayl*tempy3(INDEX_IJK)
+ duydzl = xizl*tempy1(INDEX_IJK) + etazl*tempy2(INDEX_IJK) + gammazl*tempy3(INDEX_IJK)
- duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
- duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
- duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
+ duzdxl = xixl*tempz1(INDEX_IJK) + etaxl*tempz2(INDEX_IJK) + gammaxl*tempz3(INDEX_IJK)
+ duzdyl = xiyl*tempz1(INDEX_IJK) + etayl*tempz2(INDEX_IJK) + gammayl*tempz3(INDEX_IJK)
+ duzdzl = xizl*tempz1(INDEX_IJK) + etazl*tempz2(INDEX_IJK) + gammazl*tempz3(INDEX_IJK)
- ! precompute some sums to save CPU time
- duxdxl_plus_duydyl = duxdxl + duydyl
- duxdxl_plus_duzdzl = duxdxl + duzdzl
- duydyl_plus_duzdzl = duydyl + duzdzl
- duxdyl_plus_duydxl = duxdyl + duydxl
- duzdxl_plus_duxdzl = duzdxl + duxdzl
- duzdyl_plus_duydzl = duzdyl + duydzl
+ ! precompute some sums to save CPU time
+ duxdxl_plus_duydyl = duxdxl + duydyl
+ duxdxl_plus_duzdzl = duxdxl + duzdzl
+ duydyl_plus_duzdzl = duydyl + duzdzl
+ duxdyl_plus_duydxl = duxdyl + duydxl
+ duzdxl_plus_duxdzl = duzdxl + duxdzl
+ duzdyl_plus_duydzl = duzdyl + duydzl
- ! compute deviatoric strain
- if (COMPUTE_AND_STORE_STRAIN) then
- if(NSPEC_INNER_CORE_STRAIN_ONLY == 1) then
- if( ispec == 1) then
- epsilon_trace_over_3(i,j,k,1) = templ
- endif
- else
- epsilon_trace_over_3(i,j,k,ispec) = templ
- endif
- templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
- epsilondev_loc(1,i,j,k) = duxdxl - templ
- epsilondev_loc(2,i,j,k) = duydyl - templ
- epsilondev_loc(3,i,j,k) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,i,j,k) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,i,j,k) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
+ ! compute deviatoric strain
+ if (COMPUTE_AND_STORE_STRAIN) then
+ templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+ if(NSPEC_INNER_CORE_STRAIN_ONLY == 1) then
+ if( ispec == 1) then
+ epsilon_trace_over_3(INDEX_IJK,1) = templ
endif
+ else
+ epsilon_trace_over_3(INDEX_IJK,ispec) = templ
+ endif
+ epsilondev_loc(1,INDEX_IJK) = duxdxl - templ
+ epsilondev_loc(2,INDEX_IJK) = duydyl - templ
+ epsilondev_loc(3,INDEX_IJK) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_loc(4,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_loc(5,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
+ endif
- if(ANISOTROPIC_INNER_CORE_VAL) then
- ! elastic tensor for hexagonal symmetry in reduced notation:
- !
- ! c11 c12 c13 0 0 0
- ! c12 c11 c13 0 0 0
- ! c13 c13 c33 0 0 0
- ! 0 0 0 c44 0 0
- ! 0 0 0 0 c44 0
- ! 0 0 0 0 0 (c11-c12)/2
- !
- ! in terms of the A, C, L, N and F of Love (1927):
- !
- ! c11 = A
- ! c12 = A-2N
- ! c13 = F
- ! c33 = C
- ! c44 = L
- c11l = c11store(i,j,k,ispec)
- c12l = c12store(i,j,k,ispec)
- c13l = c13store(i,j,k,ispec)
- c33l = c33store(i,j,k,ispec)
- c44l = c44store(i,j,k,ispec)
+ if(ANISOTROPIC_INNER_CORE_VAL) then
+ ! elastic tensor for hexagonal symmetry in reduced notation:
+ !
+ ! c11 c12 c13 0 0 0
+ ! c12 c11 c13 0 0 0
+ ! c13 c13 c33 0 0 0
+ ! 0 0 0 c44 0 0
+ ! 0 0 0 0 c44 0
+ ! 0 0 0 0 0 (c11-c12)/2
+ !
+ ! in terms of the A, C, L, N and F of Love (1927):
+ !
+ ! c11 = A
+ ! c12 = A-2N
+ ! c13 = F
+ ! c33 = C
+ ! c44 = L
+ c11l = c11store(INDEX_IJK,ispec)
+ c12l = c12store(INDEX_IJK,ispec)
+ c13l = c13store(INDEX_IJK,ispec)
+ c33l = c33store(INDEX_IJK,ispec)
+ c44l = c44store(INDEX_IJK,ispec)
- ! use unrelaxed parameters if attenuation
- if(ATTENUATION_VAL) then
- if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- minus_sum_beta = one_minus_sum_beta(i,j,k,ispec) - 1.0_CUSTOM_REAL
- else
- minus_sum_beta = one_minus_sum_beta(1,1,1,ispec) - 1.0_CUSTOM_REAL
- endif
- mul = muvstore(i,j,k,ispec)
- c11l = c11l + FOUR_THIRDS * minus_sum_beta * mul
- c12l = c12l - TWO_THIRDS * minus_sum_beta * mul
- c13l = c13l - TWO_THIRDS * minus_sum_beta * mul
- c33l = c33l + FOUR_THIRDS * minus_sum_beta * mul
- c44l = c44l + minus_sum_beta * mul
- endif
+ ! use unrelaxed parameters if attenuation
+ if(ATTENUATION_VAL) then
+ if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
+ minus_sum_beta = one_minus_sum_beta(INDEX_IJK,ispec) - 1.0_CUSTOM_REAL
+ else
+ minus_sum_beta = one_minus_sum_beta(1,1,1,ispec) - 1.0_CUSTOM_REAL
+ endif
+ mul = muvstore(INDEX_IJK,ispec)
+ c11l = c11l + FOUR_THIRDS * minus_sum_beta * mul
+ c12l = c12l - TWO_THIRDS * minus_sum_beta * mul
+ c13l = c13l - TWO_THIRDS * minus_sum_beta * mul
+ c33l = c33l + FOUR_THIRDS * minus_sum_beta * mul
+ c44l = c44l + minus_sum_beta * mul
+ endif
- sigma_xx = c11l*duxdxl + c12l*duydyl + c13l*duzdzl
- sigma_yy = c12l*duxdxl + c11l*duydyl + c13l*duzdzl
- sigma_zz = c13l*duxdxl + c13l*duydyl + c33l*duzdzl
- sigma_xy = 0.5_CUSTOM_REAL*(c11l-c12l)*duxdyl_plus_duydxl
- sigma_xz = c44l*duzdxl_plus_duxdzl
- sigma_yz = c44l*duzdyl_plus_duydzl
+ sigma_xx = c11l*duxdxl + c12l*duydyl + c13l*duzdzl
+ sigma_yy = c12l*duxdxl + c11l*duydyl + c13l*duzdzl
+ sigma_zz = c13l*duxdxl + c13l*duydyl + c33l*duzdzl
+ sigma_xy = 0.5_CUSTOM_REAL*(c11l-c12l)*duxdyl_plus_duydxl
+ sigma_xz = c44l*duzdxl_plus_duxdzl
+ sigma_yz = c44l*duzdyl_plus_duydzl
+ else
+
+ ! inner core with no anisotropy, use kappav and muv for instance
+ ! layer with no anisotropy, use kappav and muv for instance
+ kappal = kappavstore(INDEX_IJK,ispec)
+ mul = muvstore(INDEX_IJK,ispec)
+
+ ! use unrelaxed parameters if attenuation
+ if(ATTENUATION_VAL ) then
+ if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
+ mul = mul * one_minus_sum_beta(INDEX_IJK,ispec)
else
+ mul = mul * one_minus_sum_beta(1,1,1,ispec)
+ endif
+ endif
- ! inner core with no anisotropy, use kappav and muv for instance
- ! layer with no anisotropy, use kappav and muv for instance
- kappal = kappavstore(i,j,k,ispec)
- mul = muvstore(i,j,k,ispec)
+ lambdalplus2mul = kappal + FOUR_THIRDS * mul
+ lambdal = lambdalplus2mul - 2._CUSTOM_REAL*mul
- ! use unrelaxed parameters if attenuation
- if(ATTENUATION_VAL ) then
- if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- mul = mul * one_minus_sum_beta(i,j,k,ispec)
- else
- mul = mul * one_minus_sum_beta(1,1,1,ispec)
- endif
- endif
+ ! compute stress sigma
+ sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
+ sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
+ sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
- lambdalplus2mul = kappal + FOUR_THIRDS * mul
- lambdal = lambdalplus2mul - 2._CUSTOM_REAL*mul
+ sigma_xy = mul*duxdyl_plus_duydxl
+ sigma_xz = mul*duzdxl_plus_duxdzl
+ sigma_yz = mul*duzdyl_plus_duydzl
- ! compute stress sigma
- sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
- sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
- sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
+ endif
- sigma_xy = mul*duxdyl_plus_duydxl
- sigma_xz = mul*duzdxl_plus_duxdzl
- sigma_yz = mul*duzdyl_plus_duydzl
+ ! subtract memory variables if attenuation
+ if( ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL ) then
+#ifdef FORCE_VECTORIZATION
+ ! do NOT put this is a subroutine, otherwise the call to the subroutine prevents compilers
+ !from vectorizing the outer loop
- endif
+ ! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop
+ ! in order to vectorize the outer loop
+ R_xx_val = R_xx(1,INDEX_IJK,ispec)
+ R_yy_val = R_yy(1,INDEX_IJK,ispec)
+ sigma_xx = sigma_xx - R_xx_val
+ sigma_yy = sigma_yy - R_yy_val
+ sigma_zz = sigma_zz + R_xx_val + R_yy_val
+ sigma_xy = sigma_xy - R_xy(1,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(1,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(1,INDEX_IJK,ispec)
- ! subtract memory variables if attenuation
- if( ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL ) then
- ! note: Fortran passes pointers to array location, thus R_memory(1,1,...) is fine
- call compute_element_att_stress(R_xx(1,i,j,k,ispec), &
- R_yy(1,i,j,k,ispec), &
- R_xy(1,i,j,k,ispec), &
- R_xz(1,i,j,k,ispec), &
- R_yz(1,i,j,k,ispec), &
- sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
- endif
+ R_xx_val = R_xx(2,INDEX_IJK,ispec)
+ R_yy_val = R_yy(2,INDEX_IJK,ispec)
+ sigma_xx = sigma_xx - R_xx_val
+ sigma_yy = sigma_yy - R_yy_val
+ sigma_zz = sigma_zz + R_xx_val + R_yy_val
+ sigma_xy = sigma_xy - R_xy(2,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(2,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(2,INDEX_IJK,ispec)
- ! define symmetric components of sigma for gravity
- sigma_yx = sigma_xy
- sigma_zx = sigma_xz
- sigma_zy = sigma_yz
+ R_xx_val = R_xx(3,INDEX_IJK,ispec)
+ R_yy_val = R_yy(3,INDEX_IJK,ispec)
+ sigma_xx = sigma_xx - R_xx_val
+ sigma_yy = sigma_yy - R_yy_val
+ sigma_zz = sigma_zz + R_xx_val + R_yy_val
+ sigma_xy = sigma_xy - R_xy(3,INDEX_IJK,ispec)
+ sigma_xz = sigma_xz - R_xz(3,INDEX_IJK,ispec)
+ sigma_yz = sigma_yz - R_yz(3,INDEX_IJK,ispec)
+#else
+ ! note: Fortran passes pointers to array location, thus R_memory(1,1,...) is fine
+ call compute_element_att_stress(R_xx(1,INDEX_IJK,ispec), &
+ R_yy(1,INDEX_IJK,ispec), &
+ R_xy(1,INDEX_IJK,ispec), &
+ R_xz(1,INDEX_IJK,ispec), &
+ R_yz(1,INDEX_IJK,ispec), &
+ sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
+#endif
+ endif
- ! compute non-symmetric terms for gravity
- if(GRAVITY_VAL) then
+ ! define symmetric components of sigma for gravity
+ sigma_yx = sigma_xy
+ sigma_zx = sigma_xz
+ sigma_zy = sigma_yz
- ! use mesh coordinates to get theta and phi
- ! x y and z contain r theta and phi
- iglob = ibool(i,j,k,ispec)
- radius = dble(xstore(iglob))
- theta = dble(ystore(iglob))
- phi = dble(zstore(iglob))
+ ! compute non-symmetric terms for gravity
+ if(GRAVITY_VAL) then
- ! make sure radius is never zero even for points at center of cube
- ! because we later divide by radius
- if(radius < 100.d0 / R_EARTH) radius = 100.d0 / R_EARTH
+ ! use mesh coordinates to get theta and phi
+ ! x y and z contain r theta and phi
+ iglob = ibool(INDEX_IJK,ispec)
+ radius = dble(xstore(iglob))
+ theta = dble(ystore(iglob))
+ phi = dble(zstore(iglob))
- cos_theta = dcos(theta)
- sin_theta = dsin(theta)
- cos_phi = dcos(phi)
- sin_phi = dsin(phi)
+ ! make sure radius is never zero even for points at center of cube
+ ! because we later divide by radius
+ if(radius < 100.d0 / R_EARTH) radius = 100.d0 / R_EARTH
- cos_theta_sq = cos_theta**2
- sin_theta_sq = sin_theta**2
- cos_phi_sq = cos_phi**2
- sin_phi_sq = sin_phi**2
+ cos_theta = dcos(theta)
+ sin_theta = dsin(theta)
+ cos_phi = dcos(phi)
+ sin_phi = dsin(phi)
- ! get g, rho and dg/dr=dg
- ! spherical components of the gravitational acceleration
- ! for efficiency replace with lookup table every 100 m in radial direction
- ! make sure we never use zero for point exactly at the center of the Earth
- int_radius = max(1,nint(radius * R_EARTH_KM * 10.d0))
- minus_g = minus_gravity_table(int_radius)
- minus_dg = minus_deriv_gravity_table(int_radius)
- rho = density_table(int_radius)
+ cos_theta_sq = cos_theta**2
+ sin_theta_sq = sin_theta**2
+ cos_phi_sq = cos_phi**2
+ sin_phi_sq = sin_phi**2
- ! Cartesian components of the gravitational acceleration
- gxl = minus_g*sin_theta*cos_phi
- gyl = minus_g*sin_theta*sin_phi
- gzl = minus_g*cos_theta
+ ! get g, rho and dg/dr=dg
+ ! spherical components of the gravitational acceleration
+ ! for efficiency replace with lookup table every 100 m in radial direction
+ ! make sure we never use zero for point exactly at the center of the Earth
+ int_radius = max(1,nint(radius * R_EARTH_KM * 10.d0))
+ minus_g = minus_gravity_table(int_radius)
+ minus_dg = minus_deriv_gravity_table(int_radius)
+ rho = density_table(int_radius)
- ! Cartesian components of gradient of gravitational acceleration
- ! obtained from spherical components
- minus_g_over_radius = minus_g / radius
- minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius
+ ! Cartesian components of the gravitational acceleration
+ gxl = minus_g*sin_theta*cos_phi
+ gyl = minus_g*sin_theta*sin_phi
+ gzl = minus_g*cos_theta
- Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq
- Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq
- Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq
- Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq
- Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
- Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
+ ! Cartesian components of gradient of gravitational acceleration
+ ! obtained from spherical components
+ minus_g_over_radius = minus_g / radius
+ minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius
- ! for locality principle, we set iglob again, in order to have it in the cache again
- iglob = ibool(i,j,k,ispec)
+ Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq
+ Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq
+ Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq
+ Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq
+ Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
+ Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * dble(displ_inner_core(1,iglob))
- sy_l = rho * dble(displ_inner_core(2,iglob))
- sz_l = rho * dble(displ_inner_core(3,iglob))
+ ! for locality principle, we set iglob again, in order to have it in the cache again
+ iglob = ibool(INDEX_IJK,ispec)
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
- sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
- sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ ! get displacement and multiply by density to compute G tensor
+ sx_l = rho * dble(displ_inner_core(1,iglob))
+ sy_l = rho * dble(displ_inner_core(2,iglob))
+ sz_l = rho * dble(displ_inner_core(3,iglob))
- sigma_xy = sigma_xy - sngl(sx_l * gyl)
- sigma_yx = sigma_yx - sngl(sy_l * gxl)
+ ! compute G tensor from s . g and add to sigma (not symmetric)
+ sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
+ sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
+ sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
- sigma_xz = sigma_xz - sngl(sx_l * gzl)
- sigma_zx = sigma_zx - sngl(sz_l * gxl)
+ sigma_xy = sigma_xy - sngl(sx_l * gyl)
+ sigma_yx = sigma_yx - sngl(sy_l * gxl)
- sigma_yz = sigma_yz - sngl(sy_l * gzl)
- sigma_zy = sigma_zy - sngl(sz_l * gyl)
+ sigma_xz = sigma_xz - sngl(sx_l * gzl)
+ sigma_zx = sigma_zx - sngl(sz_l * gxl)
- ! precompute vector
- factor = dble(jacobianl) * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
- rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
- rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+ sigma_yz = sigma_yz - sngl(sy_l * gzl)
+ sigma_zy = sigma_zy - sngl(sz_l * gyl)
- else
+ ! precompute vector
+ factor = dble(jacobianl) * wgll_cube(INDEX_IJK)
+ rho_s_H(1,INDEX_IJK) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+ rho_s_H(2,INDEX_IJK) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+ rho_s_H(3,INDEX_IJK) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * displ_inner_core(1,iglob)
- sy_l = rho * displ_inner_core(2,iglob)
- sz_l = rho * displ_inner_core(3,iglob)
+ else
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
- sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
- sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
+ ! get displacement and multiply by density to compute G tensor
+ sx_l = rho * displ_inner_core(1,iglob)
+ sy_l = rho * displ_inner_core(2,iglob)
+ sz_l = rho * displ_inner_core(3,iglob)
- sigma_xy = sigma_xy - sx_l * gyl
- sigma_yx = sigma_yx - sy_l * gxl
+ ! compute G tensor from s . g and add to sigma (not symmetric)
+ sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
+ sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
+ sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
- sigma_xz = sigma_xz - sx_l * gzl
- sigma_zx = sigma_zx - sz_l * gxl
+ sigma_xy = sigma_xy - sx_l * gyl
+ sigma_yx = sigma_yx - sy_l * gxl
- sigma_yz = sigma_yz - sy_l * gzl
- sigma_zy = sigma_zy - sz_l * gyl
+ sigma_xz = sigma_xz - sx_l * gzl
+ sigma_zx = sigma_zx - sz_l * gxl
- ! precompute vector
- factor = jacobianl * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+ sigma_yz = sigma_yz - sy_l * gzl
+ sigma_zy = sigma_zy - sz_l * gyl
- endif
+ ! precompute vector
+ factor = jacobianl * wgll_cube(INDEX_IJK)
+ rho_s_H(1,INDEX_IJK) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+ rho_s_H(2,INDEX_IJK) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+ rho_s_H(3,INDEX_IJK) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
- endif ! end of section with gravity terms
+ endif
- ! form dot product with test vector, non-symmetric form
- tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
- tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
- tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
+ endif ! end of section with gravity terms
- tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
- tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
- tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
+ ! form dot product with test vector, non-symmetric form
+ tempx1(INDEX_IJK) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
+ tempy1(INDEX_IJK) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
+ tempz1(INDEX_IJK) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
- tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
- tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
- tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
+ tempx2(INDEX_IJK) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
+ tempy2(INDEX_IJK) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
+ tempz2(INDEX_IJK) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
- enddo
- enddo
- enddo
+ tempx3(INDEX_IJK) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
+ tempy3(INDEX_IJK) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
+ tempz3(INDEX_IJK) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
+ ENDDO_LOOP_IJK
+
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
@@ -640,37 +668,41 @@
enddo
enddo
+ ! sums contributions
+
+ DO_LOOP_IJK
+
+ fac1 = wgllwgll_yz_3D(INDEX_IJK)
+ fac2 = wgllwgll_xz_3D(INDEX_IJK)
+ fac3 = wgllwgll_xy_3D(INDEX_IJK)
+ sum_terms(1,INDEX_IJK) = - (fac1*newtempx1(INDEX_IJK) + fac2*newtempx2(INDEX_IJK) + fac3*newtempx3(INDEX_IJK))
+ sum_terms(2,INDEX_IJK) = - (fac1*newtempy1(INDEX_IJK) + fac2*newtempy2(INDEX_IJK) + fac3*newtempy3(INDEX_IJK))
+ sum_terms(3,INDEX_IJK) = - (fac1*newtempz1(INDEX_IJK) + fac2*newtempz2(INDEX_IJK) + fac3*newtempz3(INDEX_IJK))
+
+ ENDDO_LOOP_IJK
+
+ ! adds gravity
+ if(GRAVITY_VAL) then
+
#ifdef FORCE_VECTORIZATION
- do ijk=1,NGLLCUBE
- fac1 = wgllwgll_yz_3D(ijk,1,1)
- fac2 = wgllwgll_xz_3D(ijk,1,1)
- fac3 = wgllwgll_xy_3D(ijk,1,1)
- sum_terms(1,ijk,1,1) = - (fac1*newtempx1(ijk,1,1) + fac2*newtempx2(ijk,1,1) + fac3*newtempx3(ijk,1,1))
- sum_terms(2,ijk,1,1) = - (fac1*newtempy1(ijk,1,1) + fac2*newtempy2(ijk,1,1) + fac3*newtempy3(ijk,1,1))
- sum_terms(3,ijk,1,1) = - (fac1*newtempz1(ijk,1,1) + fac2*newtempz2(ijk,1,1) + fac3*newtempz3(ijk,1,1))
- enddo
- if(GRAVITY_VAL) then
do ijk = 1,NDIM*NGLLCUBE
sum_terms(ijk,1,1,1) = sum_terms(ijk,1,1,1) + rho_s_H(ijk,1,1,1)
enddo
- endif
#else
- do k=1,NGLLZ
- do j=1,NGLLY
- fac1 = wgllwgll_yz(j,k)
- do i=1,NGLLX
- fac2 = wgllwgll_xz(i,k)
- fac3 = wgllwgll_xy(i,j)
- ! sum contributions
- sum_terms(1,i,j,k) = - (fac1*newtempx1(i,j,k) + fac2*newtempx2(i,j,k) + fac3*newtempx3(i,j,k))
- sum_terms(2,i,j,k) = - (fac1*newtempy1(i,j,k) + fac2*newtempy2(i,j,k) + fac3*newtempy3(i,j,k))
- sum_terms(3,i,j,k) = - (fac1*newtempz1(i,j,k) + fac2*newtempz2(i,j,k) + fac3*newtempz3(i,j,k))
- if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ sum_terms(1,INDEX_IJK) = sum_terms(1,INDEX_IJK) + rho_s_H(1,INDEX_IJK)
+ sum_terms(2,INDEX_IJK) = sum_terms(2,INDEX_IJK) + rho_s_H(2,INDEX_IJK)
+ sum_terms(3,INDEX_IJK) = sum_terms(3,INDEX_IJK) + rho_s_H(3,INDEX_IJK)
+ enddo
enddo
enddo
- enddo
#endif
+ endif
+
+
! sum contributions from each element to the global mesh and add gravity terms
#ifdef FORCE_VECTORIZATION
! we can force vectorization using a compiler directive here because we know that there is no dependency
@@ -681,23 +713,22 @@
!pgi$ ivdep
!DIR$ IVDEP
do ijk = 1,NGLLCUBE
- iglob = ibool(ijk,1,1,ispec)
- ! do NOT use array syntax ":" for the three statements below
- ! otherwise most compilers will not be able to vectorize the outer loop
- accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(1,ijk,1,1)
- accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(2,ijk,1,1)
- accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(3,ijk,1,1)
- enddo
#else
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
+#endif
+
+ iglob = ibool(INDEX_IJK,ispec)
! do NOT use array syntax ":" for the three statements below
! otherwise most compilers will not be able to vectorize the outer loop
- accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(1,i,j,k)
- accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(2,i,j,k)
- accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(3,i,j,k)
+ accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(1,INDEX_IJK)
+ accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(2,INDEX_IJK)
+ accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(3,INDEX_IJK)
+
+#ifdef FORCE_VECTORIZATION
+ enddo
+#else
enddo
enddo
enddo
@@ -735,7 +766,7 @@
muvstore, &
epsilondev_xx,epsilondev_yy,epsilondev_xy, &
epsilondev_xz,epsilondev_yz, &
- epsilondev_loc) !!!!!!!!!!!! ,is_backward_field)
+ epsilondev_loc)
endif
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_noDev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_noDev.f90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_noDev.f90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -28,7 +28,6 @@
subroutine compute_forces_inner_core( NSPEC,NGLOB,NSPEC_ATT, &
deltat, &
displ_inner_core, &
-! veloc_inner_core, &
accel_inner_core, &
phase_is_inner, &
R_xx,R_yy,R_xy,R_xz,R_yz, &
@@ -37,7 +36,7 @@
epsilondev_xz,epsilondev_yz, &
epsilon_trace_over_3,&
alphaval,betaval,gammaval,factor_common, &
- vnspec) !!!!!!!!!!!!!!!!!!! ,is_backward_field)
+ vnspec)
use constants_solver
@@ -71,7 +70,6 @@
! displacement, velocity and acceleration
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_inner_core
-! real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: veloc_inner_core
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_inner_core
! for attenuation
@@ -95,8 +93,6 @@
! inner/outer element run flag
logical :: phase_is_inner
-!!!!!!!!!!!!!!!! logical :: is_backward_field
-
! local parameters
real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
@@ -576,7 +572,7 @@
muvstore, &
epsilondev_xx,epsilondev_yy,epsilondev_xy, &
epsilondev_xz,epsilondev_yz, &
- epsilondev_loc) !!!!!!!!!!!! ,is_backward_field)
+ epsilondev_loc)
endif
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -25,6 +25,12 @@
!
!=====================================================================
+! we switch between vectorized and non-vectorized version by using pre-processor flag FORCE_VECTORIZATION
+! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
+#include "config.fh"
+
+
+
subroutine compute_forces_outer_core_Dev(time,deltat,two_omega_earth, &
NSPEC,NGLOB, &
A_array_rotation,B_array_rotation, &
@@ -53,11 +59,7 @@
nspec_outer => nspec_outer_outer_core, &
nspec_inner => nspec_inner_outer_core
-#ifdef FORCE_VECTORIZATION
use specfem_par,only: wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D
-#else
- use specfem_par,only: wgllwgll_xy,wgllwgll_xz,wgllwgll_yz
-#endif
implicit none
@@ -81,9 +83,10 @@
logical :: phase_is_inner
! local parameters
-
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: newtempx1,newtempx2,newtempx3
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: sum_terms
+
! for gravity
integer :: int_radius
double precision :: radius,theta,phi,gxl,gyl,gzl
@@ -128,9 +131,6 @@
#ifdef FORCE_VECTORIZATION
integer :: ijk
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: sum_terms
-#else
- real(kind=CUSTOM_REAL) :: sum_terms
#endif
! ****************************************************
@@ -155,13 +155,14 @@
ispec = phase_ispec_inner(ispec_p,iphase)
! only compute element which belong to current phase (inner or outer elements)
-#ifdef FORCE_VECTORIZATION
- do ijk=1,NGLLCUBE
- iglob = ibool(ijk,1,1,ispec)
- ! get a local copy of the potential field
- dummyx_loc(ijk,1,1) = displfluid(iglob)
+ DO_LOOP_IJK
+ iglob = ibool(INDEX_IJK,ispec)
+
+ ! stores "displacement"
+ dummyx_loc(INDEX_IJK) = displfluid(iglob)
+
! pre-computes factors
! use mesh coordinates to get theta and phi
! x y z contain r theta phi
@@ -178,64 +179,22 @@
if( .not. GRAVITY_VAL ) then
! grad(rho)/rho in Cartesian components
- displ_times_grad_x_ln_rho(ijk,1,1) = dummyx_loc(ijk,1,1) &
- * sin_theta * cos_phi * d_ln_density_dr_table(int_radius)
- displ_times_grad_y_ln_rho(ijk,1,1) = dummyx_loc(ijk,1,1) &
- * sin_theta * sin_phi * d_ln_density_dr_table(int_radius)
- displ_times_grad_z_ln_rho(ijk,1,1) = dummyx_loc(ijk,1,1) &
- * cos_theta * d_ln_density_dr_table(int_radius)
+ displ_times_grad_x_ln_rho(INDEX_IJK) = dummyx_loc(INDEX_IJK) &
+ * sngl(sin_theta * cos_phi * d_ln_density_dr_table(int_radius))
+ displ_times_grad_y_ln_rho(INDEX_IJK) = dummyx_loc(INDEX_IJK) &
+ * sngl(sin_theta * sin_phi * d_ln_density_dr_table(int_radius))
+ displ_times_grad_z_ln_rho(INDEX_IJK) = dummyx_loc(INDEX_IJK) &
+ * sngl(cos_theta * d_ln_density_dr_table(int_radius))
else
! Cartesian components of the gravitational acceleration
! integrate and multiply by rho / Kappa
- temp_gxl(ijk,1,1) = sin_theta*cos_phi
- temp_gyl(ijk,1,1) = sin_theta*sin_phi
- temp_gzl(ijk,1,1) = cos_theta
+ temp_gxl(INDEX_IJK) = sin_theta*cos_phi
+ temp_gyl(INDEX_IJK) = sin_theta*sin_phi
+ temp_gzl(INDEX_IJK) = cos_theta
endif
- enddo
-#else
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
- ! stores "displacement"
- dummyx_loc(i,j,k) = displfluid(iglob)
+ ENDDO_LOOP_IJK
- ! pre-computes factors
- ! use mesh coordinates to get theta and phi
- ! x y z contain r theta phi
- radius = dble(xstore(iglob))
- theta = dble(ystore(iglob))
- phi = dble(zstore(iglob))
-
- cos_theta = dcos(theta)
- sin_theta = dsin(theta)
- cos_phi = dcos(phi)
- sin_phi = dsin(phi)
-
- int_radius = nint(radius * R_EARTH_KM * 10.d0)
-
- if( .not. GRAVITY_VAL ) then
- ! grad(rho)/rho in Cartesian components
- displ_times_grad_x_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
- * sngl(sin_theta * cos_phi * d_ln_density_dr_table(int_radius))
- displ_times_grad_y_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
- * sngl(sin_theta * sin_phi * d_ln_density_dr_table(int_radius))
- displ_times_grad_z_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
- * sngl(cos_theta * d_ln_density_dr_table(int_radius))
- else
- ! Cartesian components of the gravitational acceleration
- ! integrate and multiply by rho / Kappa
- temp_gxl(i,j,k) = sin_theta*cos_phi
- temp_gyl(i,j,k) = sin_theta*sin_phi
- temp_gzl(i,j,k) = cos_theta
- endif
-
- enddo
- enddo
- enddo
-#endif
-
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
@@ -271,151 +230,146 @@
enddo
enddo
+ DO_LOOP_IJK
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
+ ! get derivatives of velocity potential with respect to x, y and z
+ xixl = xix(INDEX_IJK,ispec)
+ xiyl = xiy(INDEX_IJK,ispec)
+ xizl = xiz(INDEX_IJK,ispec)
+ etaxl = etax(INDEX_IJK,ispec)
+ etayl = etay(INDEX_IJK,ispec)
+ etazl = etaz(INDEX_IJK,ispec)
+ gammaxl = gammax(INDEX_IJK,ispec)
+ gammayl = gammay(INDEX_IJK,ispec)
+ gammazl = gammaz(INDEX_IJK,ispec)
- ! get derivatives of velocity potential with respect to x, y and z
- xixl = xix(i,j,k,ispec)
- xiyl = xiy(i,j,k,ispec)
- xizl = xiz(i,j,k,ispec)
- etaxl = etax(i,j,k,ispec)
- etayl = etay(i,j,k,ispec)
- etazl = etaz(i,j,k,ispec)
- gammaxl = gammax(i,j,k,ispec)
- gammayl = gammay(i,j,k,ispec)
- gammazl = gammaz(i,j,k,ispec)
+ ! compute the jacobian
+ jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+ - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+ + xizl*(etaxl*gammayl-etayl*gammaxl))
- ! compute the jacobian
- jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
+ dpotentialdxl = xixl*tempx1(INDEX_IJK) + etaxl*tempx2(INDEX_IJK) + gammaxl*tempx3(INDEX_IJK)
+ dpotentialdyl = xiyl*tempx1(INDEX_IJK) + etayl*tempx2(INDEX_IJK) + gammayl*tempx3(INDEX_IJK)
+ dpotentialdzl = xizl*tempx1(INDEX_IJK) + etazl*tempx2(INDEX_IJK) + gammazl*tempx3(INDEX_IJK)
- dpotentialdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
- dpotentialdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
- dpotentialdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
+ ! compute contribution of rotation and add to gradient of potential
+ ! this term has no Z component
+ if(ROTATION_VAL) then
- ! compute contribution of rotation and add to gradient of potential
- ! this term has no Z component
- if(ROTATION_VAL) then
+ ! store the source for the Euler scheme for A_rotation and B_rotation
+ two_omega_deltat = deltat * two_omega_earth
- ! store the source for the Euler scheme for A_rotation and B_rotation
- two_omega_deltat = deltat * two_omega_earth
+ cos_two_omega_t = cos(two_omega_earth*time)
+ sin_two_omega_t = sin(two_omega_earth*time)
- cos_two_omega_t = cos(two_omega_earth*time)
- sin_two_omega_t = sin(two_omega_earth*time)
+ ! time step deltat of Euler scheme is included in the source
+ source_euler_A(INDEX_IJK) = two_omega_deltat &
+ * (cos_two_omega_t * dpotentialdyl + sin_two_omega_t * dpotentialdxl)
+ source_euler_B(INDEX_IJK) = two_omega_deltat &
+ * (sin_two_omega_t * dpotentialdyl - cos_two_omega_t * dpotentialdxl)
- ! time step deltat of Euler scheme is included in the source
- source_euler_A(i,j,k) = two_omega_deltat &
- * (cos_two_omega_t * dpotentialdyl + sin_two_omega_t * dpotentialdxl)
- source_euler_B(i,j,k) = two_omega_deltat &
- * (sin_two_omega_t * dpotentialdyl - cos_two_omega_t * dpotentialdxl)
+ A_rotation = A_array_rotation(INDEX_IJK,ispec)
+ B_rotation = B_array_rotation(INDEX_IJK,ispec)
- A_rotation = A_array_rotation(i,j,k,ispec)
- B_rotation = B_array_rotation(i,j,k,ispec)
+ ux_rotation = A_rotation*cos_two_omega_t + B_rotation*sin_two_omega_t
+ uy_rotation = - A_rotation*sin_two_omega_t + B_rotation*cos_two_omega_t
- ux_rotation = A_rotation*cos_two_omega_t + B_rotation*sin_two_omega_t
- uy_rotation = - A_rotation*sin_two_omega_t + B_rotation*cos_two_omega_t
+ dpotentialdx_with_rot = dpotentialdxl + ux_rotation
+ dpotentialdy_with_rot = dpotentialdyl + uy_rotation
- dpotentialdx_with_rot = dpotentialdxl + ux_rotation
- dpotentialdy_with_rot = dpotentialdyl + uy_rotation
+ else
- else
+ dpotentialdx_with_rot = dpotentialdxl
+ dpotentialdy_with_rot = dpotentialdyl
- dpotentialdx_with_rot = dpotentialdxl
- dpotentialdy_with_rot = dpotentialdyl
+ endif ! end of section with rotation
- endif ! end of section with rotation
+ ! add (chi/rho)grad(rho) term in no gravity case
+ if(.not. GRAVITY_VAL) then
- ! add (chi/rho)grad(rho) term in no gravity case
- if(.not. GRAVITY_VAL) then
+ ! With regards to the non-gravitating case: we cannot set N^2 = 0 *and* let g = 0.
+ ! We can *either* assume N^2 = 0 but keep gravity g, *or* we can assume that gravity
+ ! is negligible to begin with, as in our GJI 2002a, in which case N does not arise.
+ ! We get:
+ !
+ ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
+ !
+ ! Then the displacement is
+ !
+ ! \bu = \bdel\chi+\chi\bdel\ln\rho = \rho^{-1}\bdel(\rho\chi)
+ !
+ ! and the pressure is
+ !
+ ! p = -\rho\ddot{\chi}
+ !
+ ! Thus in our 2002b GJI paper eqn (21) is wrong, and equation (41)
+ ! in our AGU monograph is incorrect; these equations should be replaced by
+ !
+ ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
+ !
+ ! Note that the fluid potential we use in GJI 2002a differs from the one used here:
+ !
+ ! \chi_GJI2002a = \rho\partial\t\chi
+ !
+ ! such that
+ !
+ ! \bv = \partial_t\bu=\rho^{-1}\bdel\chi_GJI2002a (GJI 2002a eqn 20)
+ !
+ ! p = - \partial_t\chi_GJI2002a (GJI 2002a eqn 19)
- ! With regards to the non-gravitating case: we cannot set N^2 = 0 *and* let g = 0.
- ! We can *either* assume N^2 = 0 but keep gravity g, *or* we can assume that gravity
- ! is negligible to begin with, as in our GJI 2002a, in which case N does not arise.
- ! We get:
- !
- ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
- !
- ! Then the displacement is
- !
- ! \bu = \bdel\chi+\chi\bdel\ln\rho = \rho^{-1}\bdel(\rho\chi)
- !
- ! and the pressure is
- !
- ! p = -\rho\ddot{\chi}
- !
- ! Thus in our 2002b GJI paper eqn (21) is wrong, and equation (41)
- ! in our AGU monograph is incorrect; these equations should be replaced by
- !
- ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
- !
- ! Note that the fluid potential we use in GJI 2002a differs from the one used here:
- !
- ! \chi_GJI2002a = \rho\partial\t\chi
- !
- ! such that
- !
- ! \bv = \partial_t\bu=\rho^{-1}\bdel\chi_GJI2002a (GJI 2002a eqn 20)
- !
- ! p = - \partial_t\chi_GJI2002a (GJI 2002a eqn 19)
+ ! use mesh coordinates to get theta and phi
+ ! x y z contain r theta phi
+ dpotentialdx_with_rot = dpotentialdx_with_rot + displ_times_grad_x_ln_rho(INDEX_IJK)
+ dpotentialdy_with_rot = dpotentialdy_with_rot + displ_times_grad_y_ln_rho(INDEX_IJK)
+ dpotentialdzl = dpotentialdzl + displ_times_grad_z_ln_rho(INDEX_IJK)
- ! use mesh coordinates to get theta and phi
- ! x y z contain r theta phi
- dpotentialdx_with_rot = dpotentialdx_with_rot + displ_times_grad_x_ln_rho(i,j,k)
- dpotentialdy_with_rot = dpotentialdy_with_rot + displ_times_grad_y_ln_rho(i,j,k)
- dpotentialdzl = dpotentialdzl + displ_times_grad_z_ln_rho(i,j,k)
+ else
+ ! if gravity is turned on
- else
- ! if gravity is turned on
+ ! compute divergence of displacment
+ gxl = temp_gxl(INDEX_IJK)
+ gyl = temp_gyl(INDEX_IJK)
+ gzl = temp_gzl(INDEX_IJK)
- ! compute divergence of displacment
- gxl = temp_gxl(i,j,k)
- gyl = temp_gyl(i,j,k)
- gzl = temp_gzl(i,j,k)
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ gravity_term(INDEX_IJK) = &
+ sngl( minus_rho_g_over_kappa_fluid(int_radius) &
+ * dble(jacobianl) * wgll_cube(INDEX_IJK) &
+ * (dble(dpotentialdx_with_rot) * gxl &
+ + dble(dpotentialdy_with_rot) * gyl &
+ + dble(dpotentialdzl) * gzl) )
+ else
+ gravity_term(INDEX_IJK) = minus_rho_g_over_kappa_fluid(int_radius) * &
+ jacobianl * wgll_cube(INDEX_IJK) &
+ * (dpotentialdx_with_rot * gxl &
+ + dpotentialdy_with_rot * gyl &
+ + dpotentialdzl * gzl)
+ endif
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- gravity_term(i,j,k) = &
- sngl( minus_rho_g_over_kappa_fluid(int_radius) &
- * dble(jacobianl) * wgll_cube(i,j,k) &
- * (dble(dpotentialdx_with_rot) * gxl &
- + dble(dpotentialdy_with_rot) * gyl &
- + dble(dpotentialdzl) * gzl) )
- else
- gravity_term(i,j,k) = minus_rho_g_over_kappa_fluid(int_radius) * &
- jacobianl * wgll_cube(i,j,k) &
- * (dpotentialdx_with_rot * gxl &
- + dpotentialdy_with_rot * gyl &
- + dpotentialdzl * gzl)
- endif
+ ! divergence of displacement field with gravity on
+ ! note: these calculations are only considered for SIMULATION_TYPE == 1 .and. SAVE_FORWARD
+ ! and one has set MOVIE_VOLUME_TYPE == 4 when MOVIE_VOLUME is .true.;
+ ! in case of SIMULATION_TYPE == 3, it gets overwritten by compute_kernels_outer_core()
+ if( MOVIE_VOLUME .and. NSPEC_OUTER_CORE_3DMOVIE /= 1 ) then
+ div_displfluid(INDEX_IJK,ispec) = &
+ minus_rho_g_over_kappa_fluid(int_radius) &
+ * (dpotentialdx_with_rot * gxl &
+ + dpotentialdy_with_rot * gyl &
+ + dpotentialdzl * gzl)
+ endif
- ! divergence of displacement field with gravity on
- ! note: these calculations are only considered for SIMULATION_TYPE == 1 .and. SAVE_FORWARD
- ! and one has set MOVIE_VOLUME_TYPE == 4 when MOVIE_VOLUME is .true.;
- ! in case of SIMULATION_TYPE == 3, it gets overwritten by compute_kernels_outer_core()
- if( MOVIE_VOLUME .and. NSPEC_OUTER_CORE_3DMOVIE /= 1 ) then
- div_displfluid(i,j,k,ispec) = &
- minus_rho_g_over_kappa_fluid(int_radius) &
- * (dpotentialdx_with_rot * gxl &
- + dpotentialdy_with_rot * gyl &
- + dpotentialdzl * gzl)
- endif
+ endif
- endif
+ tempx1(INDEX_IJK) = jacobianl*(xixl*dpotentialdx_with_rot &
+ + xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl)
+ tempx2(INDEX_IJK) = jacobianl*(etaxl*dpotentialdx_with_rot &
+ + etayl*dpotentialdy_with_rot + etazl*dpotentialdzl)
+ tempx3(INDEX_IJK) = jacobianl*(gammaxl*dpotentialdx_with_rot &
+ + gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl)
- tempx1(i,j,k) = jacobianl*(xixl*dpotentialdx_with_rot &
- + xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl)
- tempx2(i,j,k) = jacobianl*(etaxl*dpotentialdx_with_rot &
- + etayl*dpotentialdy_with_rot + etazl*dpotentialdzl)
- tempx3(i,j,k) = jacobianl*(gammaxl*dpotentialdx_with_rot &
- + gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl)
+ ENDDO_LOOP_IJK
- enddo
- enddo
- enddo
-
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
@@ -452,17 +406,29 @@
enddo
! sum contributions from each element to the global mesh and add gravity term
-#ifdef FORCE_VECTORIZATION
- do ijk=1,NGLLCUBE
- sum_terms(ijk,1,1) = - (wgllwgll_yz_3D(ijk,1,1)*newtempx1(ijk,1,1) &
- + wgllwgll_xz_3D(ijk,1,1)*newtempx2(ijk,1,1) &
- + wgllwgll_xy_3D(ijk,1,1)*newtempx3(ijk,1,1))
- enddo
+
+ DO_LOOP_IJK
+
+ sum_terms(INDEX_IJK) = - ( wgllwgll_yz_3D(INDEX_IJK)*newtempx1(INDEX_IJK) &
+ + wgllwgll_xz_3D(INDEX_IJK)*newtempx2(INDEX_IJK) &
+ + wgllwgll_xy_3D(INDEX_IJK)*newtempx3(INDEX_IJK))
+
+ ENDDO_LOOP_IJK
+
+ ! adds gravity
if(GRAVITY_VAL) then
- do ijk = 1,NGLLCUBE
- sum_terms(ijk,1,1) = sum_terms(ijk,1,1) + gravity_term(ijk,1,1)
- enddo
+
+ DO_LOOP_IJK
+
+ sum_terms(INDEX_IJK) = sum_terms(INDEX_IJK) + gravity_term(INDEX_IJK)
+
+ ENDDO_LOOP_IJK
+
endif
+
+ ! updates acceleration
+
+#ifdef FORCE_VECTORIZATION
! we can force vectorization using a compiler directive here because we know that there is no dependency
! inside a given spectral element, since all the global points of a local elements are different by definition
! (only common points between different elements can be the same)
@@ -471,63 +437,54 @@
!pgi$ ivdep
!DIR$ IVDEP
do ijk = 1,NGLLCUBE
- iglob = ibool(ijk,1,1,ispec)
- accelfluid(iglob) = accelfluid(iglob) + sum_terms(ijk,1,1)
- enddo
- ! update rotation term with Euler scheme
- if(ROTATION_VAL) then
- if(USE_LDDRK) then
- ! use the source saved above
- do ijk = 1,NGLLCUBE
- A_array_rotation_lddrk(ijk,1,1,ispec) = ALPHA_LDDRK(istage) * A_array_rotation_lddrk(ijk,1,1,ispec) &
- + source_euler_A(ijk,1,1)
- A_array_rotation(ijk,1,1,ispec) = A_array_rotation(ijk,1,1,ispec) &
- + BETA_LDDRK(istage) * A_array_rotation_lddrk(ijk,1,1,ispec)
-
- B_array_rotation_lddrk(ijk,1,1,ispec) = ALPHA_LDDRK(istage) * B_array_rotation_lddrk(ijk,1,1,ispec) &
- + source_euler_B(ijk,1,1)
- B_array_rotation(ijk,1,1,ispec) = B_array_rotation(ijk,1,1,ispec) &
- + BETA_LDDRK(istage) * B_array_rotation_lddrk(ijk,1,1,ispec)
- enddo
- else
- do ijk = 1,NGLLCUBE
- A_array_rotation(ijk,1,1,ispec) = A_array_rotation(ijk,1,1,ispec) + source_euler_A(ijk,1,1)
- B_array_rotation(ijk,1,1,ispec) = B_array_rotation(ijk,1,1,ispec) + source_euler_B(ijk,1,1)
- enddo
- endif
- endif
#else
- ! sum contributions from each element to the global mesh and add gravity term
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- sum_terms = - (wgllwgll_yz(j,k)*newtempx1(i,j,k) &
- + wgllwgll_xz(i,k)*newtempx2(i,j,k) &
- + wgllwgll_xy(i,j)*newtempx3(i,j,k))
+#endif
+ iglob = ibool(INDEX_IJK,ispec)
+ accelfluid(iglob) = accelfluid(iglob) + sum_terms(INDEX_IJK)
- if(GRAVITY_VAL) sum_terms = sum_terms + gravity_term(i,j,k)
-
- iglob = ibool(i,j,k,ispec)
- accelfluid(iglob) = accelfluid(iglob) + sum_terms
+#ifdef FORCE_VECTORIZATION
+ enddo
+#else
enddo
enddo
enddo
+#endif
+
! update rotation term with Euler scheme
+
if(ROTATION_VAL) then
+
if(USE_LDDRK) then
+
! use the source saved above
- A_array_rotation_lddrk(:,:,:,ispec) = ALPHA_LDDRK(istage) * A_array_rotation_lddrk(:,:,:,ispec) + source_euler_A(:,:,:)
- A_array_rotation(:,:,:,ispec) = A_array_rotation(:,:,:,ispec) + BETA_LDDRK(istage) * A_array_rotation_lddrk(:,:,:,ispec)
+ DO_LOOP_IJK
- B_array_rotation_lddrk(:,:,:,ispec) = ALPHA_LDDRK(istage) * B_array_rotation_lddrk(:,:,:,ispec) + source_euler_B(:,:,:)
- B_array_rotation(:,:,:,ispec) = B_array_rotation(:,:,:,ispec) + BETA_LDDRK(istage) * B_array_rotation_lddrk(:,:,:,ispec)
+ A_array_rotation_lddrk(INDEX_IJK,ispec) = ALPHA_LDDRK(istage) * A_array_rotation_lddrk(INDEX_IJK,ispec) &
+ + source_euler_A(INDEX_IJK)
+ A_array_rotation(INDEX_IJK,ispec) = A_array_rotation(INDEX_IJK,ispec) &
+ + BETA_LDDRK(istage) * A_array_rotation_lddrk(INDEX_IJK,ispec)
+
+ B_array_rotation_lddrk(INDEX_IJK,ispec) = ALPHA_LDDRK(istage) * B_array_rotation_lddrk(INDEX_IJK,ispec) &
+ + source_euler_B(INDEX_IJK)
+ B_array_rotation(INDEX_IJK,ispec) = B_array_rotation(INDEX_IJK,ispec) &
+ + BETA_LDDRK(istage) * B_array_rotation_lddrk(INDEX_IJK,ispec)
+
+ ENDDO_LOOP_IJK
+
else
- ! use the source saved above
- A_array_rotation(:,:,:,ispec) = A_array_rotation(:,:,:,ispec) + source_euler_A(:,:,:)
- B_array_rotation(:,:,:,ispec) = B_array_rotation(:,:,:,ispec) + source_euler_B(:,:,:)
+
+ DO_LOOP_IJK
+
+ A_array_rotation(INDEX_IJK,ispec) = A_array_rotation(INDEX_IJK,ispec) + source_euler_A(INDEX_IJK)
+ B_array_rotation(INDEX_IJK,ispec) = B_array_rotation(INDEX_IJK,ispec) + source_euler_B(INDEX_IJK)
+
+ ENDDO_LOOP_IJK
+
endif
endif
-#endif
enddo ! spectral element loop
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -85,7 +85,6 @@
NSPEC_CRUST_MANTLE_ATTENUATION, &
deltat, &
displ_crust_mantle, &
-! veloc_crust_mantle, &
accel_crust_mantle, &
phase_is_inner, &
R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle, &
@@ -96,13 +95,12 @@
epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
eps_trace_over_3_crust_mantle, &
alphaval,betaval,gammaval, &
- factor_common_crust_mantle,ATT4_VAL, .false. )
+ factor_common_crust_mantle,ATT4_VAL)
! inner core region
call compute_forces_inner_core_Dev(NSPEC_INNER_CORE_STR_OR_ATT,NGLOB_INNER_CORE, &
NSPEC_INNER_CORE_ATTENUATION, &
deltat, &
displ_inner_core, &
-! veloc_inner_core, &
accel_inner_core, &
phase_is_inner, &
R_xx_inner_core,R_yy_inner_core,R_xy_inner_core, &
@@ -113,7 +111,7 @@
epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
eps_trace_over_3_inner_core,&
alphaval,betaval,gammaval, &
- factor_common_inner_core,ATT5_VAL) !!!! , .false. )
+ factor_common_inner_core,ATT5_VAL)
else
! no Deville optimization
! crust/mantle region
@@ -121,7 +119,6 @@
NSPEC_CRUST_MANTLE_ATTENUATION, &
deltat, &
displ_crust_mantle, &
-! veloc_crust_mantle, &
accel_crust_mantle, &
phase_is_inner, &
R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle, &
@@ -132,13 +129,12 @@
epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
eps_trace_over_3_crust_mantle, &
alphaval,betaval,gammaval, &
- factor_common_crust_mantle,ATT4_VAL) !!!!!!!!!!!! , .false. )
+ factor_common_crust_mantle,ATT4_VAL)
! inner core region
call compute_forces_inner_core(NSPEC_INNER_CORE_STR_OR_ATT,NGLOB_INNER_CORE, &
NSPEC_INNER_CORE_ATTENUATION, &
deltat, &
displ_inner_core, &
-! veloc_inner_core, &
accel_inner_core, &
phase_is_inner, &
R_xx_inner_core,R_yy_inner_core,R_xy_inner_core, &
@@ -149,7 +145,7 @@
epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
eps_trace_over_3_inner_core,&
alphaval,betaval,gammaval, &
- factor_common_inner_core,ATT5_VAL) !!!!!!!!!!! , .false. )
+ factor_common_inner_core,ATT5_VAL)
endif
else
! on GPU
@@ -472,7 +468,6 @@
NSPEC_CRUST_MANTLE_STR_AND_ATT, &
b_deltat, &
b_displ_crust_mantle, &
-! b_veloc_crust_mantle, &
b_accel_crust_mantle, &
phase_is_inner, &
b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
@@ -484,13 +479,12 @@
b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
b_eps_trace_over_3_crust_mantle, &
b_alphaval,b_betaval,b_gammaval, &
- factor_common_crust_mantle,ATT4_VAL, .true. )
+ factor_common_crust_mantle,ATT4_VAL)
! inner core region
call compute_forces_inner_core_Dev(NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
NSPEC_INNER_CORE_STR_AND_ATT, &
b_deltat, &
b_displ_inner_core, &
-! b_veloc_inner_core, &
b_accel_inner_core, &
phase_is_inner, &
b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
@@ -501,7 +495,7 @@
b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
b_eps_trace_over_3_inner_core,&
b_alphaval,b_betaval,b_gammaval, &
- factor_common_inner_core,ATT5_VAL) !!!! , .true. )
+ factor_common_inner_core,ATT5_VAL)
else
! no Deville optimization
! crust/mantle region
@@ -509,7 +503,6 @@
NSPEC_CRUST_MANTLE_STR_AND_ATT, &
b_deltat, &
b_displ_crust_mantle, &
-! b_veloc_crust_mantle, &
b_accel_crust_mantle, &
phase_is_inner, &
b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
@@ -521,13 +514,12 @@
b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
b_eps_trace_over_3_crust_mantle, &
b_alphaval,b_betaval,b_gammaval, &
- factor_common_crust_mantle,ATT4_VAL) !!!!!!!!!!!! , .true. )
+ factor_common_crust_mantle,ATT4_VAL)
! inner core region
call compute_forces_inner_core(NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
NSPEC_INNER_CORE_STR_AND_ATT, &
b_deltat, &
b_displ_inner_core, &
-! b_veloc_inner_core, &
b_accel_inner_core, &
phase_is_inner, &
b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
@@ -538,7 +530,7 @@
b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
b_eps_trace_over_3_inner_core,&
b_alphaval,b_betaval,b_gammaval, &
- factor_common_inner_core,ATT5_VAL) !!!!!!!!!!! , .true. )
+ factor_common_inner_core,ATT5_VAL)
endif
else
! on GPU
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_GLL_points.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_GLL_points.f90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_GLL_points.f90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -40,18 +40,16 @@
hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube)
- ! define a 3D extension in order to be able to force vectorization in the compute_forces routines
- if( FORCE_VECTORIZATION_VAL ) then
- do k = 1,NGLLZ
- do j = 1,NGLLY
- do i = 1,NGLLX
- wgllwgll_yz_3D(i,j,k) = wgllwgll_yz(j,k)
- wgllwgll_xz_3D(i,j,k) = wgllwgll_xz(i,k)
- wgllwgll_xy_3D(i,j,k) = wgllwgll_xy(i,j)
- enddo
+ ! define a 3D extension in order to be able to force vectorization in the compute_forces_**_Dev routines
+ do k = 1,NGLLZ
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ wgllwgll_yz_3D(i,j,k) = wgllwgll_yz(j,k)
+ wgllwgll_xz_3D(i,j,k) = wgllwgll_xz(i,k)
+ wgllwgll_xy_3D(i,j,k) = wgllwgll_xy(i,j)
enddo
enddo
- endif
+ enddo
if( USE_DEVILLE_PRODUCTS_VAL ) then
! check that optimized routines from Deville et al. (2002) can be used
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_sources_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_sources_receivers.f90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_sources_receivers.f90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -365,7 +365,7 @@
! count number of receivers located in this slice
nrec_local = 0
if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) then
- ! note: for 1-chunk simulations, nrec is now the actual number of receivers found in this chunk
+ ! note: for 1-chunk simulations, nrec is now the actual number of receivers found in this chunk
! (excludes stations located outside of chunk)
nrec_simulation = nrec
do irec = 1,nrec
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90 2013-10-23 13:03:00 UTC (rev 22970)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90 2013-10-23 14:27:26 UTC (rev 22971)
@@ -68,11 +68,12 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xxT,hprimewgll_xxT
real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
- ! arrays for force_vectorization
+ ! arrays for Deville and force_vectorization
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D
!-----------------------------------------------------------------
More information about the CIG-COMMITS
mailing list