[cig-commits] r22746 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER: setup src/cuda src/shared src/specfem3D
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Fri Aug 30 05:45:45 PDT 2013
Author: danielpeter
Date: 2013-08-30 05:45:44 -0700 (Fri, 30 Aug 2013)
New Revision: 22746
Added:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.F90
Removed:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
Modified:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/parallel.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic_calling_routine.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_noDev.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_noDev.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_regular_points.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/rules.mk
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_regular_kernels.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_GLL_points.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/update_displacement_Newmark.f90
Log:
separates forward and backward routines; renames endings to read_mesh_databases.F90 and compute_forces_outer_core_Dev.F90 to use preprocessor directives; adds regular kernel routines
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in 2013-08-30 12:45:44 UTC (rev 22746)
@@ -277,12 +277,10 @@
! old version
! old version 5.1.5 uses full 3d attenuation arrays (set to .true.), custom_real for attenuation arrays (Qmu_store, tau_e_store)
logical, parameter :: USE_VERSION_5_1_5 = .true.
-! logical, parameter :: USE_3D_ATTENUATION_ARRAYS = .true.
! integer, parameter :: CUSTOM_REAL_ATT = CUSTOM_REAL
! new version
! new version uses full 3d attenuation, double precision for attenuation arrays (Qmu_store, tau_e_store)
!! logical, parameter :: USE_VERSION_5_1_5 = .false.
-!! logical, parameter :: USE_3D_ATTENUATION_ARRAYS = .true.
!! integer,parameter :: CUSTOM_REAL_ATT = SIZE_DOUBLE
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu 2013-08-30 12:45:44 UTC (rev 22746)
@@ -172,7 +172,8 @@
extern "C"
void FC_FUNC_(compute_coupling_fluid_cmb_cuda,
- COMPUTE_COUPLING_FLUID_CMB_CUDA)(long* Mesh_pointer_f) {
+ COMPUTE_COUPLING_FLUID_CMB_CUDA)(long* Mesh_pointer_f,
+ int* FORWARD_OR_ADJOINT) {
TRACE("compute_coupling_fluid_cmb_cuda");
//double start_time = get_time();
@@ -190,19 +191,19 @@
dim3 threads(5,5,1);
// launches GPU kernel
- compute_coupling_fluid_CMB_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
- mp->d_accel_outer_core,
- mp->d_ibool_crust_mantle,
- mp->d_ibelm_bottom_crust_mantle,
- mp->d_normal_top_outer_core,
- mp->d_jacobian2D_top_outer_core,
- mp->d_wgllwgll_xy,
- mp->d_ibool_outer_core,
- mp->d_ibelm_top_outer_core,
- mp->nspec2D_top_outer_core);
-
- // adjoint simulations
- if ( mp->simulation_type == 3 ){
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ compute_coupling_fluid_CMB_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
+ mp->d_accel_outer_core,
+ mp->d_ibool_crust_mantle,
+ mp->d_ibelm_bottom_crust_mantle,
+ mp->d_normal_top_outer_core,
+ mp->d_jacobian2D_top_outer_core,
+ mp->d_wgllwgll_xy,
+ mp->d_ibool_outer_core,
+ mp->d_ibelm_top_outer_core,
+ mp->nspec2D_top_outer_core);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
+ // adjoint simulations
compute_coupling_fluid_CMB_kernel<<<grid,threads>>>(mp->d_b_displ_crust_mantle,
mp->d_b_accel_outer_core,
mp->d_ibool_crust_mantle,
@@ -226,7 +227,8 @@
extern "C"
void FC_FUNC_(compute_coupling_fluid_icb_cuda,
- COMPUTE_COUPLING_FLUID_ICB_CUDA)(long* Mesh_pointer_f) {
+ COMPUTE_COUPLING_FLUID_ICB_CUDA)(long* Mesh_pointer_f,
+ int* FORWARD_OR_ADJOINT) {
TRACE("compute_coupling_fluid_icb_cuda");
//double start_time = get_time();
@@ -244,19 +246,19 @@
dim3 threads(5,5,1);
// launches GPU kernel
- compute_coupling_fluid_ICB_kernel<<<grid,threads>>>(mp->d_displ_inner_core,
- mp->d_accel_outer_core,
- mp->d_ibool_inner_core,
- mp->d_ibelm_top_inner_core,
- mp->d_normal_bottom_outer_core,
- mp->d_jacobian2D_bottom_outer_core,
- mp->d_wgllwgll_xy,
- mp->d_ibool_outer_core,
- mp->d_ibelm_bottom_outer_core,
- mp->nspec2D_bottom_outer_core);
-
- // adjoint simulations
- if ( mp->simulation_type == 3 ){
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ compute_coupling_fluid_ICB_kernel<<<grid,threads>>>(mp->d_displ_inner_core,
+ mp->d_accel_outer_core,
+ mp->d_ibool_inner_core,
+ mp->d_ibelm_top_inner_core,
+ mp->d_normal_bottom_outer_core,
+ mp->d_jacobian2D_bottom_outer_core,
+ mp->d_wgllwgll_xy,
+ mp->d_ibool_outer_core,
+ mp->d_ibelm_bottom_outer_core,
+ mp->nspec2D_bottom_outer_core);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
+ // adjoint simulations
compute_coupling_fluid_ICB_kernel<<<grid,threads>>>(mp->d_b_displ_inner_core,
mp->d_b_accel_outer_core,
mp->d_ibool_inner_core,
@@ -268,6 +270,7 @@
mp->d_ibelm_bottom_outer_core,
mp->nspec2D_bottom_outer_core);
}
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//double end_time = get_time();
//printf("Elapsed time: %e\n",end_time-start_time);
@@ -420,7 +423,8 @@
extern "C"
void FC_FUNC_(compute_coupling_cmb_fluid_cuda,
- COMPUTE_COUPLING_CMB_FLUID_CUDA)(long* Mesh_pointer_f) {
+ COMPUTE_COUPLING_CMB_FLUID_CUDA)(long* Mesh_pointer_f,
+ int* FORWARD_OR_ADJOINT) {
TRACE("compute_coupling_cmb_fluid_cuda");
//double start_time = get_time();
@@ -438,23 +442,23 @@
dim3 threads(5,5,1);
// launches GPU kernel
- compute_coupling_CMB_fluid_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
- mp->d_accel_crust_mantle,
- mp->d_accel_outer_core,
- mp->d_ibool_crust_mantle,
- mp->d_ibelm_bottom_crust_mantle,
- mp->d_normal_top_outer_core,
- mp->d_jacobian2D_top_outer_core,
- mp->d_wgllwgll_xy,
- mp->d_ibool_outer_core,
- mp->d_ibelm_top_outer_core,
- mp->RHO_TOP_OC,
- mp->minus_g_cmb,
- mp->gravity,
- mp->nspec2D_bottom_crust_mantle);
-
- // adjoint simulations
- if ( mp->simulation_type == 3 ){
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ compute_coupling_CMB_fluid_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
+ mp->d_accel_crust_mantle,
+ mp->d_accel_outer_core,
+ mp->d_ibool_crust_mantle,
+ mp->d_ibelm_bottom_crust_mantle,
+ mp->d_normal_top_outer_core,
+ mp->d_jacobian2D_top_outer_core,
+ mp->d_wgllwgll_xy,
+ mp->d_ibool_outer_core,
+ mp->d_ibelm_top_outer_core,
+ mp->RHO_TOP_OC,
+ mp->minus_g_cmb,
+ mp->gravity,
+ mp->nspec2D_bottom_crust_mantle);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
+ // adjoint simulations
compute_coupling_CMB_fluid_kernel<<<grid,threads>>>(mp->d_b_displ_crust_mantle,
mp->d_b_accel_crust_mantle,
mp->d_b_accel_outer_core,
@@ -482,7 +486,8 @@
extern "C"
void FC_FUNC_(compute_coupling_icb_fluid_cuda,
- COMPUTE_COUPLING_ICB_FLUID_CUDA)(long* Mesh_pointer_f) {
+ COMPUTE_COUPLING_ICB_FLUID_CUDA)(long* Mesh_pointer_f,
+ int* FORWARD_OR_ADJOINT) {
TRACE("compute_coupling_icb_fluid_cuda");
//double start_time = get_time();
@@ -500,23 +505,23 @@
dim3 threads(5,5,1);
// launches GPU kernel
- compute_coupling_ICB_fluid_kernel<<<grid,threads>>>(mp->d_displ_inner_core,
- mp->d_accel_inner_core,
- mp->d_accel_outer_core,
- mp->d_ibool_inner_core,
- mp->d_ibelm_top_inner_core,
- mp->d_normal_bottom_outer_core,
- mp->d_jacobian2D_bottom_outer_core,
- mp->d_wgllwgll_xy,
- mp->d_ibool_outer_core,
- mp->d_ibelm_bottom_outer_core,
- mp->RHO_BOTTOM_OC,
- mp->minus_g_icb,
- mp->gravity,
- mp->nspec2D_top_inner_core);
-
- // adjoint simulations
- if ( mp->simulation_type == 3 ){
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ compute_coupling_ICB_fluid_kernel<<<grid,threads>>>(mp->d_displ_inner_core,
+ mp->d_accel_inner_core,
+ mp->d_accel_outer_core,
+ mp->d_ibool_inner_core,
+ mp->d_ibelm_top_inner_core,
+ mp->d_normal_bottom_outer_core,
+ mp->d_jacobian2D_bottom_outer_core,
+ mp->d_wgllwgll_xy,
+ mp->d_ibool_outer_core,
+ mp->d_ibelm_bottom_outer_core,
+ mp->RHO_BOTTOM_OC,
+ mp->minus_g_icb,
+ mp->gravity,
+ mp->nspec2D_top_inner_core);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
+ // adjoint simulations
compute_coupling_ICB_fluid_kernel<<<grid,threads>>>(mp->d_b_displ_inner_core,
mp->d_b_accel_inner_core,
mp->d_b_accel_outer_core,
@@ -625,6 +630,7 @@
if( ( *NCHUNKS_VAL != 6 && mp->absorbing_conditions || (mp->rotation && *exact_mass_matrix_for_rotation)) &&
! *use_lddrk ){
+ // uses corrected mass matrices
if( *FORWARD_OR_ADJOINT == 1 ){
compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_accel_crust_mantle,
mp->d_rmassx_crust_mantle,
@@ -646,6 +652,7 @@
mp->d_normal_ocean_load);
}
}else{
+ // uses only rmassz
if( *FORWARD_OR_ADJOINT == 1 ){
compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_accel_crust_mantle,
mp->d_rmassz_crust_mantle,
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_outer_core_cuda.cu 2013-08-30 12:45:44 UTC (rev 22746)
@@ -169,9 +169,11 @@
#endif
__shared__ realw s_dummy_loc[NGLL3];
+
__shared__ realw s_temp1[NGLL3];
__shared__ realw s_temp2[NGLL3];
__shared__ realw s_temp3[NGLL3];
+
__shared__ realw sh_hprime_xx[NGLL2];
__shared__ realw sh_hprimewgll_xx[NGLL2];
@@ -461,11 +463,12 @@
realw* d_xix,realw* d_xiy,realw* d_xiz,
realw* d_etax,realw* d_etay,realw* d_etaz,
realw* d_gammax,realw* d_gammay,realw* d_gammaz,
- realw time, realw b_time,
+ realw time,
realw* d_A_array_rotation,
realw* d_B_array_rotation,
realw* d_b_A_array_rotation,
- realw* d_b_B_array_rotation){
+ realw* d_b_B_array_rotation,
+ int FORWARD_OR_ADJOINT){
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("before outer_core kernel Kernel_2");
@@ -497,7 +500,8 @@
// cudaEventCreate(&stop);
// cudaEventRecord( start, 0 );
- Kernel_2_outer_core_impl<<<grid,threads>>>(nb_blocks_to_compute,
+ if( FORWARD_OR_ADJOINT == 1 ){
+ Kernel_2_outer_core_impl<<<grid,threads>>>(nb_blocks_to_compute,
mp->NGLOB_OUTER_CORE,
d_ibool,
mp->d_phase_ispec_inner_outer_core,
@@ -524,8 +528,7 @@
d_A_array_rotation,
d_B_array_rotation,
mp->NSPEC_OUTER_CORE);
-
- if(mp->simulation_type == 3) {
+ }else if( FORWARD_OR_ADJOINT == 3 ){
Kernel_2_outer_core_impl<<<grid,threads>>>(nb_blocks_to_compute,
mp->NGLOB_OUTER_CORE,
d_ibool,
@@ -547,7 +550,7 @@
mp->d_minus_rho_g_over_kappa_fluid,
mp->d_wgll_cube,
mp->rotation,
- b_time,
+ time,
mp->b_two_omega_earth,
mp->b_deltat,
d_b_A_array_rotation,
@@ -580,7 +583,7 @@
COMPUTE_FORCES_OUTER_CORE_CUDA)(long* Mesh_pointer_f,
int* iphase,
realw* time_f,
- realw* b_time_f) {
+ int* FORWARD_OR_ADJOINT) {
TRACE("compute_forces_outer_core_cuda");
@@ -590,7 +593,6 @@
Mesh* mp = (Mesh*)(*Mesh_pointer_f); // get Mesh from fortran integer wrapper
realw time = *time_f;
- realw b_time = *b_time_f;
int num_elements;
@@ -676,11 +678,12 @@
mp->d_gammax_outer_core + color_offset,
mp->d_gammay_outer_core + color_offset,
mp->d_gammaz_outer_core + color_offset,
- time,b_time,
+ time,
mp->d_A_array_rotation + color_offset_nonpadded,
mp->d_B_array_rotation + color_offset_nonpadded,
mp->d_b_A_array_rotation + color_offset_nonpadded,
- mp->d_b_B_array_rotation + color_offset_nonpadded);
+ mp->d_b_B_array_rotation + color_offset_nonpadded,
+ *FORWARD_OR_ADJOINT);
// for padded and aligned arrays
color_offset += nb_blocks_to_compute * NGLL3_PADDED;
@@ -697,11 +700,12 @@
mp->d_xix_outer_core,mp->d_xiy_outer_core,mp->d_xiz_outer_core,
mp->d_etax_outer_core,mp->d_etay_outer_core,mp->d_etaz_outer_core,
mp->d_gammax_outer_core,mp->d_gammay_outer_core,mp->d_gammaz_outer_core,
- time,b_time,
+ time,
mp->d_A_array_rotation,
mp->d_B_array_rotation,
mp->d_b_A_array_rotation,
- mp->d_b_B_array_rotation);
+ mp->d_b_B_array_rotation,
+ *FORWARD_OR_ADJOINT);
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu 2013-08-30 12:45:44 UTC (rev 22746)
@@ -51,9 +51,7 @@
realw* wgllwgll,
int* ibool,
realw* vpstore,
- int SIMULATION_TYPE,
int SAVE_FORWARD,
- realw* b_potential_dot_dot_acoustic,
realw* b_absorb_potential) {
int igll = threadIdx.x;
@@ -157,10 +155,7 @@
atomicAdd(&potential_dot_dot_acoustic[iglob],-sn*jacobianw);
// adjoint simulations
- if( SIMULATION_TYPE == 3 ){
- // Sommerfeld condition
- atomicAdd(&b_potential_dot_dot_acoustic[iglob],-b_absorb_potential[INDEX2(NGLL2,igll,iface)]);
- }else if( SIMULATION_TYPE == 1 && SAVE_FORWARD ){
+ if( SAVE_FORWARD ){
// saves boundary values
b_absorb_potential[INDEX2(NGLL2,igll,iface)] = sn*jacobianw;
}
@@ -259,14 +254,6 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- // adjoint simulations: needs absorbing boundary buffer
- if (mp->simulation_type == 3 && num_abs_boundary_faces > 0 ){
- // copies array to GPU
- print_CUDA_error_if_any(cudaMemcpy(d_b_absorb_potential,absorb_potential,
- NGLL2*num_abs_boundary_faces*sizeof(realw),
- cudaMemcpyHostToDevice),7700);
- }
-
compute_stacey_acoustic_kernel<<<grid,threads>>>(mp->d_veloc_outer_core,
mp->d_accel_outer_core,
interface_type,
@@ -282,13 +269,11 @@
d_wgllwgll,
mp->d_ibool_outer_core,
mp->d_vp_outer_core,
- mp->simulation_type,
mp->save_forward,
- mp->d_b_accel_outer_core,
d_b_absorb_potential);
// adjoint simulations: stores absorbed wavefield part
- if (mp->simulation_type == 1 && mp->save_forward && num_abs_boundary_faces > 0 ){
+ if( mp->save_forward && num_abs_boundary_faces > 0 ){
// copies array to CPU
print_CUDA_error_if_any(cudaMemcpy(absorb_potential,d_b_absorb_potential,
NGLL2*num_abs_boundary_faces*sizeof(realw),
@@ -300,3 +285,206 @@
#endif
}
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_stacey_acoustic_backward_kernel(realw* b_potential_dot_dot_acoustic,
+ realw* b_absorb_potential,
+ int interface_type,
+ int num_abs_boundary_faces,
+ int* abs_boundary_ispec,
+ int* nkmin_xi, int* nkmin_eta,
+ int* njmin, int* njmax,
+ int* nimin, int* nimax,
+ int* ibool) {
+
+ int igll = threadIdx.x;
+ int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
+ int i,j,k,iglob,ispec;
+
+ // don't compute points outside NGLLSQUARE==NGLL2==25
+ // way 2: no further check needed since blocksize = 25
+ if( iface < num_abs_boundary_faces){
+
+ // if(igll<NGLL2 && iface < num_abs_boundary_faces) {
+
+ // "-1" from index values to convert from Fortran-> C indexing
+ ispec = abs_boundary_ispec[iface]-1;
+
+ // determines indices i,j,k depending on absorbing boundary type
+ switch( interface_type ){
+ case 4:
+ // xmin
+ if( nkmin_xi[INDEX2(2,0,iface)] == 0 || njmin[INDEX2(2,0,iface)] == 0 ) return;
+
+ i = 0; // index -1
+ k = (igll/NGLLX);
+ j = (igll-k*NGLLX);
+
+ if( k < nkmin_xi[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
+ if( j < njmin[INDEX2(2,0,iface)]-1 || j > njmax[INDEX2(2,0,iface)]-1 ) return;
+
+ break;
+
+ case 5:
+ // xmax
+ if( nkmin_xi[INDEX2(2,1,iface)] == 0 || njmin[INDEX2(2,1,iface)] == 0 ) return;
+
+ i = NGLLX-1;
+ k = (igll/NGLLX);
+ j = (igll-k*NGLLX);
+
+ if( k < nkmin_xi[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
+ if( j < njmin[INDEX2(2,1,iface)]-1 || j > njmax[INDEX2(2,1,iface)]-1 ) return;
+
+ break;
+
+ case 6:
+ // ymin
+ if( nkmin_eta[INDEX2(2,0,iface)] == 0 || nimin[INDEX2(2,0,iface)] == 0 ) return;
+
+ j = 0;
+ k = (igll/NGLLX);
+ i = (igll-k*NGLLX);
+
+ if( k < nkmin_eta[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
+ if( i < nimin[INDEX2(2,0,iface)]-1 || i > nimax[INDEX2(2,0,iface)]-1 ) return;
+
+ break;
+
+ case 7:
+ // ymax
+ if( nkmin_eta[INDEX2(2,1,iface)] == 0 || nimin[INDEX2(2,1,iface)] == 0 ) return;
+
+ j = NGLLX-1;
+ k = (igll/NGLLX);
+ i = (igll-k*NGLLX);
+
+ if( k < nkmin_eta[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
+ if( i < nimin[INDEX2(2,1,iface)]-1 || i > nimax[INDEX2(2,1,iface)]-1 ) return;
+
+ break;
+
+ case 8:
+ // zmin
+ k = 0;
+ j = (igll/NGLLX);
+ i = (igll-j*NGLLX);
+
+ if( j < 0 || j > NGLLX-1 ) return;
+ if( i < 0 || i > NGLLX-1 ) return;
+
+ break;
+
+ }
+
+ iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
+
+ // Sommerfeld condition
+ atomicAdd(&b_potential_dot_dot_acoustic[iglob],-b_absorb_potential[INDEX2(NGLL2,igll,iface)]);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_stacey_acoustic_backward_cuda,
+ COMPUTE_STACEY_ACOUSTIC_BACKWARD_CUDA)(long* Mesh_pointer_f,
+ realw* absorb_potential,
+ int* itype) {
+TRACE("compute_stacey_acoustic_backward_cuda");
+ //double start_time = get_time();
+
+ int num_abs_boundary_faces;
+ int* d_abs_boundary_ispec;
+ realw* d_b_absorb_potential;
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // absorbing boundary type
+ int interface_type = *itype;
+ switch( interface_type ){
+ case 4:
+ // xmin
+ num_abs_boundary_faces = mp->nspec2D_xmin_outer_core;
+ d_abs_boundary_ispec = mp->d_ibelm_xmin_outer_core;
+ d_b_absorb_potential = mp->d_absorb_xmin_outer_core;
+ break;
+
+ case 5:
+ // xmax
+ num_abs_boundary_faces = mp->nspec2D_xmax_outer_core;
+ d_abs_boundary_ispec = mp->d_ibelm_xmax_outer_core;
+ d_b_absorb_potential = mp->d_absorb_xmax_outer_core;
+ break;
+
+ case 6:
+ // ymin
+ num_abs_boundary_faces = mp->nspec2D_ymin_outer_core;
+ d_abs_boundary_ispec = mp->d_ibelm_ymin_outer_core;
+ d_b_absorb_potential = mp->d_absorb_ymin_outer_core;
+ break;
+
+ case 7:
+ // ymax
+ num_abs_boundary_faces = mp->nspec2D_ymax_outer_core;
+ d_abs_boundary_ispec = mp->d_ibelm_ymax_outer_core;
+ d_b_absorb_potential = mp->d_absorb_ymax_outer_core;
+ break;
+
+ case 8:
+ // zmin
+ num_abs_boundary_faces = mp->nspec2D_zmin_outer_core;
+ d_abs_boundary_ispec = mp->d_ibelm_bottom_outer_core;
+ d_b_absorb_potential = mp->d_absorb_zmin_outer_core;
+ break;
+
+ default:
+ exit_on_cuda_error("compute_stacey_acoustic_cuda: unknown interface type");
+ break;
+ }
+
+ // checks if anything to do
+ if( num_abs_boundary_faces == 0 ) return;
+
+ // way 1: Elapsed time: 4.385948e-03
+ // > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
+ // int blocksize = 32;
+
+ // way 2: Elapsed time: 4.379034e-03
+ // > NGLLSQUARE==NGLL2==25, no further check inside kernel
+ int blocksize = NGLL2;
+
+ int num_blocks_x = num_abs_boundary_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // adjoint simulations: needs absorbing boundary buffer
+ if( num_abs_boundary_faces > 0 ){
+ // copies array to GPU
+ print_CUDA_error_if_any(cudaMemcpy(d_b_absorb_potential,absorb_potential,
+ NGLL2*num_abs_boundary_faces*sizeof(realw),
+ cudaMemcpyHostToDevice),7700);
+ }
+
+ compute_stacey_acoustic_backward_kernel<<<grid,threads>>>(mp->d_b_accel_outer_core,
+ d_b_absorb_potential,
+ interface_type,
+ num_abs_boundary_faces,
+ d_abs_boundary_ispec,
+ mp->d_nkmin_xi_outer_core,mp->d_nkmin_eta_outer_core,
+ mp->d_njmin_outer_core,mp->d_njmax_outer_core,
+ mp->d_nimin_outer_core,mp->d_nimax_outer_core,
+ mp->d_ibool_outer_core);
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_stacey_acoustic_backward_kernel");
+#endif
+}
+
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_stacey_elastic_cuda.cu 2013-08-30 12:45:44 UTC (rev 22746)
@@ -53,9 +53,7 @@
int* ibool,
realw* rho_vp,
realw* rho_vs,
- int SIMULATION_TYPE,
int SAVE_FORWARD,
- realw* b_accel,
realw* b_absorb_field) {
int igll = threadIdx.x; // tx
@@ -166,12 +164,7 @@
atomicAdd(&accel[iglob*3+1],-ty*jacobianw);
atomicAdd(&accel[iglob*3+2],-tz*jacobianw);
- if(SIMULATION_TYPE == 3) {
- atomicAdd(&b_accel[iglob*3 ],-b_absorb_field[INDEX3(NDIM,NGLL2,0,igll,iface)]);
- atomicAdd(&b_accel[iglob*3+1],-b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)]);
- atomicAdd(&b_accel[iglob*3+2],-b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)]);
- }
- else if(SAVE_FORWARD && SIMULATION_TYPE == 1) {
+ if( SAVE_FORWARD ){
b_absorb_field[INDEX3(NDIM,NGLL2,0,igll,iface)] = tx*jacobianw;
b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)] = ty*jacobianw;
b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)] = tz*jacobianw;
@@ -186,8 +179,8 @@
extern "C"
void FC_FUNC_(compute_stacey_elastic_cuda,
COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
- realw* absorb_field,
- int* itype) {
+ realw* absorb_field,
+ int* itype) {
TRACE("compute_stacey_elastic_cuda");
@@ -268,13 +261,6 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- // adjoint simulations: needs absorbing boundary buffer
- if(mp->simulation_type == 3 && num_abs_boundary_faces > 0) {
- // copies array to GPU
- print_CUDA_error_if_any(cudaMemcpy(d_b_absorb_field,absorb_field,
- NDIM*NGLL2*num_abs_boundary_faces*sizeof(realw),cudaMemcpyHostToDevice),7700);
- }
-
// absorbing boundary contributions
compute_stacey_elastic_kernel<<<grid,threads>>>(mp->d_veloc_crust_mantle,
mp->d_accel_crust_mantle,
@@ -293,14 +279,12 @@
mp->d_ibool_crust_mantle,
mp->d_rho_vp_crust_mantle,
mp->d_rho_vs_crust_mantle,
- mp->simulation_type,
mp->save_forward,
- mp->d_b_accel_crust_mantle,
d_b_absorb_field);
// adjoint simulations: stores absorbed wavefield part
- if(mp->simulation_type == 1 && mp->save_forward && num_abs_boundary_faces > 0 ) {
+ if(mp->save_forward && num_abs_boundary_faces > 0 ) {
// copies array to CPU
print_CUDA_error_if_any(cudaMemcpy(absorb_field,d_b_absorb_field,
NDIM*NGLL2*num_abs_boundary_faces*sizeof(realw),cudaMemcpyDeviceToHost),7701);
@@ -311,3 +295,197 @@
#endif
}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+// backward/reconstructed wavefields
+
+/* ----------------------------------------------------------------------------------------------- */
+
+__global__ void compute_stacey_elastic_backward_kernel(realw* b_accel,
+ realw* b_absorb_field,
+ int interface_type,
+ int num_abs_boundary_faces,
+ int* abs_boundary_ispec,
+ int* nkmin_xi, int* nkmin_eta,
+ int* njmin, int* njmax,
+ int* nimin, int* nimax,
+ int* ibool) {
+
+ int igll = threadIdx.x; // tx
+ int iface = blockIdx.x + gridDim.x*blockIdx.y; // bx
+
+ int i,j,k,iglob,ispec;
+
+ // don't compute surface faces outside of range
+ // and don't compute points outside NGLLSQUARE==NGLL2==25
+ //if(igll < NGLL2 && iface < num_abs_boundary_faces) {
+
+ // way 2: only check face, no further check needed since blocksize = 25
+ if( iface < num_abs_boundary_faces){
+
+ // "-1" from index values to convert from Fortran-> C indexing
+ ispec = abs_boundary_ispec[iface]-1;
+
+ // determines indices i,j,k depending on absorbing boundary type
+ switch( interface_type ){
+ case 0:
+ // xmin
+ if( nkmin_xi[INDEX2(2,0,iface)] == 0 || njmin[INDEX2(2,0,iface)] == 0 ) return;
+
+ i = 0; // index -1
+ k = (igll/NGLLX);
+ j = (igll-k*NGLLX);
+
+ if( k < nkmin_xi[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
+ if( j < njmin[INDEX2(2,0,iface)]-1 || j > NGLLX-1 ) return;
+
+ break;
+
+ case 1:
+ // xmax
+ if( nkmin_xi[INDEX2(2,1,iface)] == 0 || njmin[INDEX2(2,1,iface)] == 0 ) return;
+
+ i = NGLLX-1;
+ k = (igll/NGLLX);
+ j = (igll-k*NGLLX);
+
+ if( k < nkmin_xi[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
+ if( j < njmin[INDEX2(2,1,iface)]-1 || j > njmax[INDEX2(2,1,iface)]-1 ) return;
+
+ break;
+
+ case 2:
+ // ymin
+ if( nkmin_eta[INDEX2(2,0,iface)] == 0 || nimin[INDEX2(2,0,iface)] == 0 ) return;
+
+ j = 0;
+ k = (igll/NGLLX);
+ i = (igll-k*NGLLX);
+
+ if( k < nkmin_eta[INDEX2(2,0,iface)]-1 || k > NGLLX-1 ) return;
+ if( i < nimin[INDEX2(2,0,iface)]-1 || i > nimax[INDEX2(2,0,iface)]-1 ) return;
+
+ break;
+
+ case 3:
+ // ymax
+ if( nkmin_eta[INDEX2(2,1,iface)] == 0 || nimin[INDEX2(2,1,iface)] == 0 ) return;
+
+ j = NGLLX-1;
+ k = (igll/NGLLX);
+ i = (igll-k*NGLLX);
+
+ if( k < nkmin_eta[INDEX2(2,1,iface)]-1 || k > NGLLX-1 ) return;
+ if( i < nimin[INDEX2(2,1,iface)]-1 || i > nimax[INDEX2(2,1,iface)]-1 ) return;
+
+ break;
+ }
+
+ iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
+
+ atomicAdd(&b_accel[iglob*3 ],-b_absorb_field[INDEX3(NDIM,NGLL2,0,igll,iface)]);
+ atomicAdd(&b_accel[iglob*3+1],-b_absorb_field[INDEX3(NDIM,NGLL2,1,igll,iface)]);
+ atomicAdd(&b_accel[iglob*3+2],-b_absorb_field[INDEX3(NDIM,NGLL2,2,igll,iface)]);
+
+ } // num_abs_boundary_faces
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(compute_stacey_elastic_backward_cuda,
+ COMPUTE_STACEY_ELASTIC_BACKWARD_CUDA)(long* Mesh_pointer_f,
+ realw* absorb_field,
+ int* itype) {
+
+TRACE("compute_stacey_elastic_backward_cuda");
+
+ int num_abs_boundary_faces;
+ int* d_abs_boundary_ispec;
+ realw* d_b_absorb_field;
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ // absorbing boundary type
+ int interface_type = *itype;
+ switch( interface_type ){
+ case 0:
+ // xmin
+ num_abs_boundary_faces = mp->nspec2D_xmin_crust_mantle;
+ d_abs_boundary_ispec = mp->d_ibelm_xmin_crust_mantle;
+ d_b_absorb_field = mp->d_absorb_xmin_crust_mantle;
+ break;
+
+ case 1:
+ // xmax
+ num_abs_boundary_faces = mp->nspec2D_xmax_crust_mantle;
+ d_abs_boundary_ispec = mp->d_ibelm_xmax_crust_mantle;
+ d_b_absorb_field = mp->d_absorb_xmax_crust_mantle;
+ break;
+
+ case 2:
+ // ymin
+ num_abs_boundary_faces = mp->nspec2D_ymin_crust_mantle;
+ d_abs_boundary_ispec = mp->d_ibelm_ymin_crust_mantle;
+ d_b_absorb_field = mp->d_absorb_ymin_crust_mantle;
+ break;
+
+ case 3:
+ // ymax
+ num_abs_boundary_faces = mp->nspec2D_ymax_crust_mantle;
+ d_abs_boundary_ispec = mp->d_ibelm_ymax_crust_mantle;
+ d_b_absorb_field = mp->d_absorb_ymax_crust_mantle;
+ break;
+
+ default:
+ exit_on_cuda_error("compute_stacey_elastic_cuda: unknown interface type");
+ break;
+ }
+
+ // checks if anything to do
+ if( num_abs_boundary_faces == 0 ) return;
+
+ // way 1
+ // > NGLLSQUARE==NGLL2==25, but we handle this inside kernel
+ //int blocksize = 32;
+
+ // way 2: seems sligthly faster
+ // > NGLLSQUARE==NGLL2==25, no further check inside kernel
+ int blocksize = NGLL2;
+
+ int num_blocks_x = num_abs_boundary_faces;
+ int num_blocks_y = 1;
+ while(num_blocks_x > MAXIMUM_GRID_DIM) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(blocksize,1,1);
+
+ // adjoint simulations: needs absorbing boundary buffer
+ if( num_abs_boundary_faces > 0 ){
+ // copies array to GPU
+ print_CUDA_error_if_any(cudaMemcpy(d_b_absorb_field,absorb_field,
+ NDIM*NGLL2*num_abs_boundary_faces*sizeof(realw),cudaMemcpyHostToDevice),7700);
+ }
+
+ // absorbing boundary contributions
+ compute_stacey_elastic_backward_kernel<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
+ d_b_absorb_field,
+ interface_type,
+ num_abs_boundary_faces,
+ d_abs_boundary_ispec,
+ mp->d_nkmin_xi_crust_mantle,mp->d_nkmin_eta_crust_mantle,
+ mp->d_njmin_crust_mantle,mp->d_njmax_crust_mantle,
+ mp->d_nimin_crust_mantle,mp->d_nimax_crust_mantle,
+ mp->d_ibool_crust_mantle);
+
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_stacey_elastic_backward_cuda");
+#endif
+}
+
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu 2013-08-30 12:45:44 UTC (rev 22746)
@@ -72,9 +72,7 @@
realw* deltat_F,
realw* deltatsqover2_F,
realw* deltatover2_F,
- realw* b_deltat_F,
- realw* b_deltatsqover2_F,
- realw* b_deltatover2_F) {
+ int* FORWARD_OR_ADJOINT) {
TRACE("it_update_displacement_ic_cuda");
@@ -99,22 +97,18 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- //launch kernel
- UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ_inner_core,
- mp->d_veloc_inner_core,
- mp->d_accel_inner_core,
- size,deltat,deltatsqover2,deltatover2);
-
- // kernel for backward fields
- if(mp->simulation_type == 3) {
- realw b_deltat = *b_deltat_F;
- realw b_deltatsqover2 = *b_deltatsqover2_F;
- realw b_deltatover2 = *b_deltatover2_F;
-
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ //launch kernel
+ UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ_inner_core,
+ mp->d_veloc_inner_core,
+ mp->d_accel_inner_core,
+ size,deltat,deltatsqover2,deltatover2);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
+ // kernel for backward fields
UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_b_displ_inner_core,
mp->d_b_veloc_inner_core,
mp->d_b_accel_inner_core,
- size,b_deltat,b_deltatsqover2,b_deltatover2);
+ size,deltat,deltatsqover2,deltatover2);
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -135,9 +129,7 @@
realw* deltat_F,
realw* deltatsqover2_F,
realw* deltatover2_F,
- realw* b_deltat_F,
- realw* b_deltatsqover2_F,
- realw* b_deltatover2_F) {
+ int* FORWARD_OR_ADJOINT) {
TRACE("it_update_displacement_cm_cuda");
@@ -162,22 +154,18 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- //launch kernel
- UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
- mp->d_veloc_crust_mantle,
- mp->d_accel_crust_mantle,
- size,deltat,deltatsqover2,deltatover2);
-
- // kernel for backward fields
- if(mp->simulation_type == 3) {
- realw b_deltat = *b_deltat_F;
- realw b_deltatsqover2 = *b_deltatsqover2_F;
- realw b_deltatover2 = *b_deltatover2_F;
-
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ //launch kernel
+ UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
+ mp->d_veloc_crust_mantle,
+ mp->d_accel_crust_mantle,
+ size,deltat,deltatsqover2,deltatover2);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
+ // kernel for backward fields
UpdateDispVeloc_kernel<<<grid,threads>>>(mp->d_b_displ_crust_mantle,
mp->d_b_veloc_crust_mantle,
mp->d_b_accel_crust_mantle,
- size,b_deltat,b_deltatsqover2,b_deltatover2);
+ size,deltat,deltatsqover2,deltatover2);
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -226,12 +214,10 @@
extern "C"
void FC_FUNC_(it_update_displacement_oc_cuda,
IT_UPDATE_DISPLACEMENT_OC_cuda)(long* Mesh_pointer_f,
- realw* deltat_F,
- realw* deltatsqover2_F,
- realw* deltatover2_F,
- realw* b_deltat_F,
- realw* b_deltatsqover2_F,
- realw* b_deltatover2_F) {
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ int* FORWARD_OR_ADJOINT) {
TRACE("it_update_displacement_oc_cuda");
@@ -256,21 +242,17 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- //launch kernel
- UpdatePotential_kernel<<<grid,threads>>>(mp->d_displ_outer_core,
- mp->d_veloc_outer_core,
- mp->d_accel_outer_core,
- size,deltat,deltatsqover2,deltatover2);
-
- if(mp->simulation_type == 3) {
- realw b_deltat = *b_deltat_F;
- realw b_deltatsqover2 = *b_deltatsqover2_F;
- realw b_deltatover2 = *b_deltatover2_F;
-
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ //launch kernel
+ UpdatePotential_kernel<<<grid,threads>>>(mp->d_displ_outer_core,
+ mp->d_veloc_outer_core,
+ mp->d_accel_outer_core,
+ size,deltat,deltatsqover2,deltatover2);
+ }else if( *FORWARD_OR_ADJOINT == 1 ){
UpdatePotential_kernel<<<grid,threads>>>(mp->d_b_displ_outer_core,
mp->d_b_veloc_outer_core,
mp->d_b_accel_outer_core,
- size,b_deltat,b_deltatsqover2,b_deltatover2);
+ size,deltat,deltatsqover2,deltatover2);
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -352,20 +334,16 @@
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(kernel_3_a_cuda,
- KERNEL_3_A_CUDA)(long* Mesh_pointer,
- realw* deltatover2_F,
- int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F,
- int* NCHUNKS_VAL) {
- TRACE("kernel_3_a_cuda");
+void FC_FUNC_(update_accel_3_a_cuda,
+ UPDATE_ACCEL_3_A_CUDA)(long* Mesh_pointer,
+ realw* deltatover2_F,
+ int* NCHUNKS_VAL,
+ int* FORWARD_OR_ADJOINT) {
+ TRACE("update_accel_3_a_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
- int SIMULATION_TYPE = *SIMULATION_TYPE_f;
-
realw deltatover2 = *deltatover2_F;
- realw b_deltatover2 = *b_deltatover2_F;
int blocksize = BLOCKSIZE_KERNEL3;
int size_padded = ((int)ceil(((double)mp->NGLOB_CRUST_MANTLE)/((double)blocksize)))*blocksize;
@@ -386,40 +364,42 @@
// updates both, accel and veloc
if( *NCHUNKS_VAL != 6 && mp->absorbing_conditions){
- kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_crust_mantle,
- mp->d_accel_crust_mantle,
- mp->NGLOB_CRUST_MANTLE,
- deltatover2,
- mp->two_omega_earth,
- mp->d_rmassx_crust_mantle,
- mp->d_rmassy_crust_mantle,
- mp->d_rmassz_crust_mantle);
-
- if(SIMULATION_TYPE == 3){
+ // uses corrected mass matrices rmassx,rmassy,rmassz
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_crust_mantle,
+ mp->d_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
+ deltatover2,
+ mp->two_omega_earth,
+ mp->d_rmassx_crust_mantle,
+ mp->d_rmassy_crust_mantle,
+ mp->d_rmassz_crust_mantle);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc_crust_mantle,
mp->d_b_accel_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
- b_deltatover2,
+ deltatover2,
mp->b_two_omega_earth,
mp->d_rmassx_crust_mantle,
mp->d_rmassy_crust_mantle,
mp->d_rmassz_crust_mantle);
}
}else{
- kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_crust_mantle,
- mp->d_accel_crust_mantle,
- mp->NGLOB_CRUST_MANTLE,
- deltatover2,
- mp->two_omega_earth,
- mp->d_rmassz_crust_mantle,
- mp->d_rmassz_crust_mantle,
- mp->d_rmassz_crust_mantle);
-
- if(SIMULATION_TYPE == 3){
+ // uses only rmassz
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_crust_mantle,
+ mp->d_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
+ deltatover2,
+ mp->two_omega_earth,
+ mp->d_rmassz_crust_mantle,
+ mp->d_rmassz_crust_mantle,
+ mp->d_rmassz_crust_mantle);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc_crust_mantle,
mp->d_b_accel_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
- b_deltatover2,
+ deltatover2,
mp->b_two_omega_earth,
mp->d_rmassz_crust_mantle,
mp->d_rmassz_crust_mantle,
@@ -431,15 +411,16 @@
// updates only accel
if( *NCHUNKS_VAL != 6 && mp->absorbing_conditions){
- kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_accel_crust_mantle,
- mp->d_veloc_crust_mantle,
- mp->NGLOB_CRUST_MANTLE,
- mp->two_omega_earth,
- mp->d_rmassx_crust_mantle,
- mp->d_rmassy_crust_mantle,
- mp->d_rmassz_crust_mantle);
-
- if(SIMULATION_TYPE == 3) {
+ // uses corrected mass matrices
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_accel_crust_mantle,
+ mp->d_veloc_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
+ mp->two_omega_earth,
+ mp->d_rmassx_crust_mantle,
+ mp->d_rmassy_crust_mantle,
+ mp->d_rmassz_crust_mantle);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_b_accel_crust_mantle,
mp->d_b_veloc_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
@@ -449,15 +430,16 @@
mp->d_rmassz_crust_mantle);
}
}else{
- kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_accel_crust_mantle,
- mp->d_veloc_crust_mantle,
- mp->NGLOB_CRUST_MANTLE,
- mp->two_omega_earth,
- mp->d_rmassz_crust_mantle,
- mp->d_rmassz_crust_mantle,
- mp->d_rmassz_crust_mantle);
-
- if(SIMULATION_TYPE == 3) {
+ // uses only rmassz
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_accel_crust_mantle,
+ mp->d_veloc_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
+ mp->two_omega_earth,
+ mp->d_rmassz_crust_mantle,
+ mp->d_rmassz_crust_mantle,
+ mp->d_rmassz_crust_mantle);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
kernel_3_accel_cuda_device<<< grid, threads>>>(mp->d_b_accel_crust_mantle,
mp->d_b_veloc_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
@@ -471,26 +453,23 @@
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- exit_on_cuda_error("after kernel_3_a");
+ exit_on_cuda_error("after update_accel_3_a");
#endif
}
/* ----------------------------------------------------------------------------------------------- */
extern "C"
-void FC_FUNC_(kernel_3_b_cuda,
- KERNEL_3_B_CUDA)(long* Mesh_pointer,
- realw* deltatover2_F,
- int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F) {
- TRACE("kernel_3_b_cuda");
+void FC_FUNC_(update_veloc_3_b_cuda,
+ UPDATE_VELOC_3_B_CUDA)(long* Mesh_pointer,
+ realw* deltatover2_F,
+ int* FORWARD_OR_ADJOINT) {
+ TRACE("update_veloc_3_b_cuda");
int size_padded,num_blocks_x,num_blocks_y;
Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
- int SIMULATION_TYPE = *SIMULATION_TYPE_f;
realw deltatover2 = *deltatover2_F;
- realw b_deltatover2 = *b_deltatover2_F;
int blocksize = BLOCKSIZE_KERNEL3;
@@ -508,16 +487,16 @@
dim3 threads1(blocksize,1,1);
// updates only veloc at this point
- kernel_3_veloc_cuda_device<<< grid1, threads1>>>(mp->d_veloc_crust_mantle,
- mp->d_accel_crust_mantle,
- mp->NGLOB_CRUST_MANTLE,
- deltatover2);
-
- if(SIMULATION_TYPE == 3) {
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ kernel_3_veloc_cuda_device<<< grid1, threads1>>>(mp->d_veloc_crust_mantle,
+ mp->d_accel_crust_mantle,
+ mp->NGLOB_CRUST_MANTLE,
+ deltatover2);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
kernel_3_veloc_cuda_device<<< grid1, threads1>>>(mp->d_b_veloc_crust_mantle,
mp->d_b_accel_crust_mantle,
mp->NGLOB_CRUST_MANTLE,
- b_deltatover2);
+ deltatover2);
}
}
@@ -533,30 +512,29 @@
dim3 threads(blocksize,1,1);
// updates both, accel and veloc
- kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_inner_core,
- mp->d_accel_inner_core,
- mp->NGLOB_INNER_CORE,
- deltatover2,
- mp->two_omega_earth,
- mp->d_rmass_inner_core,
- mp->d_rmass_inner_core,
- mp->d_rmass_inner_core);
-
- if(SIMULATION_TYPE == 3) {
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ kernel_3_cuda_device<<< grid, threads>>>(mp->d_veloc_inner_core,
+ mp->d_accel_inner_core,
+ mp->NGLOB_INNER_CORE,
+ deltatover2,
+ mp->two_omega_earth,
+ mp->d_rmass_inner_core,
+ mp->d_rmass_inner_core,
+ mp->d_rmass_inner_core);
+ }else if( *FORWARD_OR_ADJOINT == 3 ){
kernel_3_cuda_device<<< grid, threads>>>(mp->d_b_veloc_inner_core,
mp->d_b_accel_inner_core,
mp->NGLOB_INNER_CORE,
- b_deltatover2,
+ deltatover2,
mp->b_two_omega_earth,
mp->d_rmass_inner_core,
mp->d_rmass_inner_core,
mp->d_rmass_inner_core);
}
-
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
//printf("checking updatedispl_kernel launch...with %dx%d blocks\n",num_blocks_x,num_blocks_y);
- exit_on_cuda_error("after kernel_3_b");
+ exit_on_cuda_error("after update_veloc_3_b");
#endif
}
@@ -593,16 +571,13 @@
void FC_FUNC_(kernel_3_outer_core_cuda,
KERNEL_3_OUTER_CORE_CUDA)(long* Mesh_pointer,
realw* deltatover2_F,
- int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F) {
+ int* FORWARD_OR_ADJOINT) {
TRACE("kernel_3_outer_core_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer); // get Mesh from fortran integer wrapper
- int SIMULATION_TYPE = *SIMULATION_TYPE_f;
realw deltatover2 = *deltatover2_F;
- realw b_deltatover2 = *b_deltatover2_F;
int blocksize = BLOCKSIZE_KERNEL3;
@@ -616,16 +591,16 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- kernel_3_outer_core_cuda_device<<< grid, threads>>>(mp->d_veloc_outer_core,
- mp->d_accel_outer_core,
- mp->NGLOB_OUTER_CORE,
- deltatover2,mp->d_rmass_outer_core);
-
- if(SIMULATION_TYPE == 3) {
+ if( *FORWARD_OR_ADJOINT == 1 ){
+ kernel_3_outer_core_cuda_device<<< grid, threads>>>(mp->d_veloc_outer_core,
+ mp->d_accel_outer_core,
+ mp->NGLOB_OUTER_CORE,
+ deltatover2,mp->d_rmass_outer_core);
+ }else if( *FORWARD_OR_ADJOINT == 3){
kernel_3_outer_core_cuda_device<<< grid, threads>>>(mp->d_b_veloc_outer_core,
mp->d_b_accel_outer_core,
mp->NGLOB_OUTER_CORE,
- b_deltatover2,mp->d_rmass_outer_core);
+ deltatover2,mp->d_rmass_outer_core);
}
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2013-08-30 12:45:44 UTC (rev 22746)
@@ -155,16 +155,20 @@
//
void FC_FUNC_(compute_coupling_fluid_cmb_cuda,
- COMPUTE_COUPLING_FLUID_CMB_CUDA)(long* Mesh_pointer_f) {}
+ COMPUTE_COUPLING_FLUID_CMB_CUDA)(long* Mesh_pointer_f,
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(compute_coupling_fluid_icb_cuda,
- COMPUTE_COUPLING_FLUID_ICB_CUDA)(long* Mesh_pointer_f) {}
+ COMPUTE_COUPLING_FLUID_ICB_CUDA)(long* Mesh_pointer_f,
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(compute_coupling_cmb_fluid_cuda,
- COMPUTE_COUPLING_CMB_FLUID_CUDA)(long* Mesh_pointer_f) {}
+ COMPUTE_COUPLING_CMB_FLUID_CUDA)(long* Mesh_pointer_f,
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(compute_coupling_icb_fluid_cuda,
- COMPUTE_COUPLING_ICB_FLUID_CUDA)(long* Mesh_pointer_f) {}
+ COMPUTE_COUPLING_ICB_FLUID_CUDA)(long* Mesh_pointer_f,
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(compute_coupling_ocean_cuda,
COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f,
@@ -200,7 +204,7 @@
COMPUTE_FORCES_OUTER_CORE_CUDA)(long* Mesh_pointer_f,
int* iphase,
realw* time_f,
- realw* b_time_f) {}
+ int* FORWARD_OR_ADJOINT) {}
//
@@ -235,17 +239,27 @@
realw* absorb_potential,
int* itype) {}
+void FC_FUNC_(compute_stacey_acoustic_backward_cuda,
+ COMPUTE_STACEY_ACOUSTIC_BACKWARD_CUDA)(long* Mesh_pointer_f,
+ realw* absorb_potential,
+ int* itype) {}
+
//
// src/cuda/compute_stacey_elastic_cuda.cu
//
void FC_FUNC_(compute_stacey_elastic_cuda,
COMPUTE_STACEY_ELASTIC_CUDA)(long* Mesh_pointer_f,
- realw* absorb_field,
- int* itype) {}
+ realw* absorb_field,
+ int* itype) {}
+void FC_FUNC_(compute_stacey_elastic_backward_cuda,
+ COMPUTE_STACEY_ELASTIC_BACKWARD_CUDA)(long* Mesh_pointer_f,
+ realw* absorb_field,
+ int* itype) {}
+
//
// src/cuda/initialize_cuda.cu
//
@@ -266,46 +280,37 @@
realw* deltat_F,
realw* deltatsqover2_F,
realw* deltatover2_F,
- realw* b_deltat_F,
- realw* b_deltatsqover2_F,
- realw* b_deltatover2_F) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(it_update_displacement_cm_cuda,
IT_UPDATE_DISPLACMENT_CM_CUDA)(long* Mesh_pointer_f,
realw* deltat_F,
realw* deltatsqover2_F,
realw* deltatover2_F,
- realw* b_deltat_F,
- realw* b_deltatsqover2_F,
- realw* b_deltatover2_F) {}
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(it_update_displacement_oc_cuda,
IT_UPDATE_DISPLACEMENT_OC_cuda)(long* Mesh_pointer_f,
- realw* deltat_F,
- realw* deltatsqover2_F,
- realw* deltatover2_F,
- realw* b_deltat_F,
- realw* b_deltatsqover2_F,
- realw* b_deltatover2_F) {}
+ realw* deltat_F,
+ realw* deltatsqover2_F,
+ realw* deltatover2_F,
+ int* FORWARD_OR_ADJOINT) {}
-void FC_FUNC_(kernel_3_a_cuda,
- KERNEL_3_A_CUDA)(long* Mesh_pointer,
- realw* deltatover2_F,
- int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F,
- int* NCHUNKS_VAL) {}
+void FC_FUNC_(update_accel_3_a_cuda,
+ UPDATE_ACCEL_3_A_CUDA)(long* Mesh_pointer,
+ realw* deltatover2_F,
+ int* NCHUNKS_VAL,
+ int* FORWARD_OR_ADJOINT) {}
-void FC_FUNC_(kernel_3_b_cuda,
- KERNEL_3_B_CUDA)(long* Mesh_pointer,
- realw* deltatover2_F,
- int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F) {}
+void FC_FUNC_(update_veloc_3_b_cuda,
+ UPDATE_VELOC_3_B_CUDA)(long* Mesh_pointer,
+ realw* deltatover2_F,
+ int* FORWARD_OR_ADJOINT) {}
void FC_FUNC_(kernel_3_outer_core_cuda,
KERNEL_3_OUTER_CORE_CUDA)(long* Mesh_pointer,
realw* deltatover2_F,
- int* SIMULATION_TYPE_f,
- realw* b_deltatover2_F) {}
+ int* FORWARD_OR_ADJOINT) {}
//
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/parallel.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/parallel.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/parallel.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -591,6 +591,28 @@
!
+ subroutine recv_singlel(recvbuf, dest, recvtag)
+
+ use mpi
+
+ implicit none
+
+ integer :: dest,recvtag
+ logical :: recvbuf
+
+ ! MPI status of messages to be received
+ integer :: msg_status(MPI_STATUS_SIZE)
+ integer :: ier
+
+ call MPI_RECV(recvbuf,1,MPI_LOGICAL,dest,recvtag,MPI_COMM_WORLD,msg_status,ier)
+
+ end subroutine recv_singlel
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
subroutine recv_i(recvbuf, recvcount, dest, recvtag)
use mpi
@@ -672,6 +694,7 @@
integer :: dest,sendtag
integer :: sendcount
integer,dimension(sendcount):: sendbuf
+
integer :: ier
call MPI_SEND(sendbuf,sendcount,MPI_INTEGER,dest,sendtag,MPI_COMM_WORLD,ier)
@@ -690,13 +713,32 @@
integer :: dest,sendtag
integer :: sendbuf
+
integer :: ier
call MPI_SEND(sendbuf,1,MPI_INTEGER,dest,sendtag,MPI_COMM_WORLD,ier)
end subroutine send_singlei
+!
+!-------------------------------------------------------------------------------------------------
+!
+ subroutine send_singlel(sendbuf, dest, sendtag)
+
+ use mpi
+
+ implicit none
+
+ integer :: dest,sendtag
+ logical :: sendbuf
+
+ integer :: ier
+
+ call MPI_SEND(sendbuf,1,MPI_LOGICAL,dest,sendtag,MPI_COMM_WORLD,ier)
+
+ end subroutine send_singlel
+
!
!-------------------------------------------------------------------------------------------------
!
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -114,6 +114,8 @@
if (ierr /= 0) stop 'an error occurred while reading the parameter file: MODEL'
call read_value_double_precision(RECORD_LENGTH_IN_MINUTES, 'solver.RECORD_LENGTH_IN_MINUTES', ierr)
if (ierr /= 0) stop 'an error occurred while reading the parameter file: RECORD_LENGTH_IN_MINUTES'
+
+ ! attenuation parameters
call read_value_logical(ATTENUATION_1D_WITH_3D_STORAGE, 'solver.ATTENUATION_1D_WITH_3D_STORAGE', ierr)
if (ierr /= 0) stop 'an error occurred while reading the parameter file: ATTENUATION_1D_WITH_3D_STORAGE'
call read_value_logical(PARTIAL_PHYS_DISPERSION_ONLY, 'solver.PARTIAL_PHYS_DISPERSION_ONLY', ierr)
@@ -123,12 +125,10 @@
call read_value_integer(NT_DUMP_ATTENUATION, 'solver.NT_DUMP_ATTENUATION', ierr)
if (ierr /= 0) stop 'an error occurred while reading the parameter file: NT_DUMP_ATTENUATION'
+ ! mass matrix corrections
call read_value_logical(EXACT_MASS_MATRIX_FOR_ROTATION, 'solver.EXACT_MASS_MATRIX_FOR_ROTATION', ierr)
if (ierr /= 0) stop 'an error occurred while reading the parameter file: EXACT_MASS_MATRIX_FOR_ROTATION'
- ! ignore EXACT_MASS_MATRIX_FOR_ROTATION if rotation is not included in the simulations
- if(.not. ROTATION) EXACT_MASS_MATRIX_FOR_ROTATION = .false.
-
! low-memory runge-kutta time scheme
call read_value_logical(USE_LDDRK, 'solver.USE_LDDRK', ierr)
if (ierr /= 0) stop 'an error occurred while reading the parameter file: USE_LDDRK'
@@ -175,11 +175,13 @@
call read_value_integer(NUMBER_OF_THIS_RUN, 'solver.NUMBER_OF_THIS_RUN', ierr)
if (ierr /= 0) stop 'an error occurred while reading the parameter file: NUMBER_OF_THIS_RUN'
+ ! data file output directories
call read_value_string(LOCAL_PATH, 'LOCAL_PATH', ierr)
if (ierr /= 0) stop 'an error occurred while reading the parameter file: LOCAL_PATH'
call read_value_string(LOCAL_TMP_PATH, 'LOCAL_TMP_PATH', ierr)
if (ierr /= 0) stop 'an error occurred while reading the parameter file: LOCAL_TMP_PATH'
+ ! user output
call read_value_integer(NTSTEP_BETWEEN_OUTPUT_INFO, 'solver.NTSTEP_BETWEEN_OUTPUT_INFO', ierr)
if (ierr /= 0) stop 'an error occurred while reading the parameter file: NTSTEP_BETWEEN_OUTPUT_INFO'
call read_value_integer(NTSTEP_BETWEEN_OUTPUT_SEISMOS, 'solver.NTSTEP_BETWEEN_OUTPUT_SEISMOS', ierr)
@@ -247,6 +249,29 @@
stop 'an error occurred while reading the parameter file'
endif
+ ! ignore EXACT_MASS_MATRIX_FOR_ROTATION if rotation is not included in the simulations
+ if(.not. ROTATION) EXACT_MASS_MATRIX_FOR_ROTATION = .false.
+
+ ! produces simulations compatible with old globe version 5.1.5
+ if( USE_VERSION_5_1_5 ) then
+ if( .not. ATTENUATION_1D_WITH_3D_STORAGE ) then
+ print*,'setting ATTENUATION_1D_WITH_3D_STORAGE to .true. for compatibility with globe version 5.1.5 '
+ ATTENUATION_1D_WITH_3D_STORAGE = .true.
+ endif
+ if( UNDO_ATTENUATION ) then
+ print*,'setting UNDO_ATTENUATION to .false. for compatibility with globe version 5.1.5 '
+ UNDO_ATTENUATION = .false.
+ endif
+ if( USE_LDDRK ) then
+ print*,'setting USE_LDDRK to .false. for compatibility with globe version 5.1.5 '
+ USE_LDDRK = .false.
+ endif
+ if( EXACT_MASS_MATRIX_FOR_ROTATION ) then
+ print*,'setting EXACT_MASS_MATRIX_FOR_ROTATION to .false. for compatibility with globe version 5.1.5 '
+ EXACT_MASS_MATRIX_FOR_ROTATION = .false.
+ endif
+ endif
+
!daniel debug: status of implementation
!! DK DK July 2013: temporary, the time for Matthieu Lefebvre to merge his ADIOS implementation
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_add_sources.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -60,7 +60,12 @@
f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
! This is the expression of a Ricker; should be changed according maybe to the Par_file.
- stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
+ if(USE_LDDRK)then
+ stf_used = FACTOR_FORCE_SOURCE * &
+ comp_source_time_function_rickr(dble(it-1)*DT + dble(C_LDDRK(istage))*DT-t0-tshift_cmt(isource),f0)
+ else
+ stf_used = FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
+ endif
! we use a force in a single direction along one of the components:
! x/y/z or E/N/Z-direction would correspond to 1/2/3 = COMPONENT_FORCE_SOURCE
@@ -69,7 +74,12 @@
+ sngl( nu_source(COMPONENT_FORCE_SOURCE,:,isource) ) * stf_used
else
- stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ if(USE_LDDRK)then
+ stf = comp_source_time_function(dble(it-1)*DT + &
+ dble(C_LDDRK(istage))*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ else
+ stf = comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ endif
! distinguish between single and double precision for reals
if(CUSTOM_REAL == SIZE_REAL) then
@@ -103,13 +113,23 @@
! prepares buffer with source time function values, to be copied onto GPU
if(USE_FORCE_POINT_SOURCE) then
do isource = 1,NSOURCES
- stf_pre_compute(isource) = &
- FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
+ if(USE_LDDRK)then
+ stf_pre_compute(isource) = FACTOR_FORCE_SOURCE * &
+ comp_source_time_function_rickr(dble(it-1)*DT + dble(C_LDDRK(istage))*DT-t0-tshift_cmt(isource),f0)
+ else
+ stf_pre_compute(isource) = &
+ FACTOR_FORCE_SOURCE * comp_source_time_function_rickr(dble(it-1)*DT-t0-tshift_cmt(isource),f0)
+ endif
enddo
else
do isource = 1,NSOURCES
- stf_pre_compute(isource) = &
- comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ if(USE_LDDRK)then
+ stf_pre_compute(isource) = comp_source_time_function(dble(it-1)*DT + &
+ dble(C_LDDRK(istage))*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ else
+ stf_pre_compute(isource) = &
+ comp_source_time_function(dble(it-1)*DT-t0-tshift_cmt(isource),hdur_gaussian(isource))
+ endif
enddo
endif
! adds sources: only implements SIMTYPE=1 and NOISE_TOM=0
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -138,7 +138,7 @@
! Moho
if (.not. SUPPRESS_CRUSTAL_MESH .and. HONOR_1D_SPHERICAL_MOHO) then
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
@@ -154,7 +154,7 @@
c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
k_top,ibelm_moho_top,normal_moho,moho_kl_top,fluid_solid_boundary,NSPEC2D_MOHO)
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
@@ -174,7 +174,7 @@
endif
! 400
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
@@ -190,7 +190,7 @@
c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
k_top,ibelm_400_top,normal_400,d400_kl_top,fluid_solid_boundary,NSPEC2D_400)
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
@@ -209,7 +209,7 @@
d400_kl = d400_kl + (d400_kl_top - d400_kl_bot) * deltat
! 670
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
@@ -225,7 +225,7 @@
c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
k_top,ibelm_670_top,normal_670,d670_kl_top,fluid_solid_boundary,NSPEC2D_670)
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
@@ -247,7 +247,7 @@
fluid_solid_boundary = .true.
iregion_code = IREGION_CRUST_MANTLE
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
@@ -270,7 +270,7 @@
allocate(dummy_ispec_is_tiso(NSPEC_OUTER_CORE))
dummy_ispec_is_tiso(:) = .false.
- call compute_boundary_kernel(vector_displ_outer_core,vector_accel_outer_core, &
+ call compute_boundary_kernel_depth(vector_displ_outer_core,vector_accel_outer_core, &
b_vector_displ_outer_core,nspec_outer_core, &
iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
xix_outer_core,xiy_outer_core,xiz_outer_core, &
@@ -291,7 +291,7 @@
! ICB
fluid_solid_boundary = .true.
- call compute_boundary_kernel(vector_displ_outer_core,vector_accel_outer_core, &
+ call compute_boundary_kernel_depth(vector_displ_outer_core,vector_accel_outer_core, &
b_vector_displ_outer_core,nspec_outer_core, &
iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
xix_outer_core,xiy_outer_core,xiz_outer_core, &
@@ -317,7 +317,7 @@
allocate(dummy_ispec_is_tiso(NSPEC_INNER_CORE))
dummy_ispec_is_tiso(:) = .false.
- call compute_boundary_kernel(displ_inner_core,accel_inner_core, &
+ call compute_boundary_kernel_depth(displ_inner_core,accel_inner_core, &
b_displ_inner_core,nspec_inner_core,iregion_code, &
ystore_inner_core,zstore_inner_core,ibool_inner_core,dummy_ispec_is_tiso, &
xix_inner_core,xiy_inner_core,xiz_inner_core, &
@@ -343,15 +343,15 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine compute_boundary_kernel(displ,accel,b_displ,nspec,iregion_code, &
- ystore,zstore,ibool,ispec_is_tiso, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_yy,hprime_zz, &
- rhostore,kappavstore,muvstore,kappahstore,muhstore,eta_anisostore, &
- c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
- c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
- c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
- k_disc,ibelm_disc,normal_disc,b_kl,fluid_solid_boundary,NSPEC2D_DISC)
+ subroutine compute_boundary_kernel_depth(displ,accel,b_displ,nspec,iregion_code, &
+ ystore,zstore,ibool,ispec_is_tiso, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ rhostore,kappavstore,muvstore,kappahstore,muhstore,eta_anisostore, &
+ c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
+ c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
+ c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
+ k_disc,ibelm_disc,normal_disc,b_kl,fluid_solid_boundary,NSPEC2D_DISC)
use constants
@@ -543,7 +543,7 @@
enddo
- end subroutine compute_boundary_kernel
+ end subroutine compute_boundary_kernel_depth
! ==========================================================================================
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -25,12 +25,12 @@
!
!=====================================================================
- subroutine compute_coupling_fluid_CMB(displ_crust_mantle,b_displ_crust_mantle, &
+ subroutine compute_coupling_fluid_CMB(displ_crust_mantle, &
ibool_crust_mantle,ibelm_bottom_crust_mantle, &
- accel_outer_core,b_accel_outer_core, &
+ accel_outer_core, &
normal_top_outer_core,jacobian2D_top_outer_core, &
wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
- SIMULATION_TYPE,nspec2D_top)
+ nspec2D_top)
use constants_solver
@@ -38,14 +38,11 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
displ_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
- b_displ_crust_mantle
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
integer, dimension(NSPEC2D_BOTTOM_CM) :: ibelm_bottom_crust_mantle
real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: accel_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE_ADJOINT) :: b_accel_outer_core
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_OC) :: normal_top_outer_core
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_OC) :: jacobian2D_top_outer_core
@@ -54,8 +51,7 @@
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
integer, dimension(NSPEC2D_TOP_OC) :: ibelm_top_outer_core
- integer SIMULATION_TYPE
- integer nspec2D_top
+ integer :: nspec2D_top
! local parameters
real(kind=CUSTOM_REAL) :: displ_x,displ_y,displ_z,displ_n,nx,ny,nz,weight
@@ -98,21 +94,6 @@
! update fluid acceleration/pressure
accel_outer_core(iglob_oc) = accel_outer_core(iglob_oc) + weight*displ_n
-
- if (SIMULATION_TYPE == 3) then
- ! get displacement in crust mantle
- iglob_cm = ibool_crust_mantle(i,j,k_corresp,ispec_selected)
- displ_x = b_displ_crust_mantle(1,iglob_cm)
- displ_y = b_displ_crust_mantle(2,iglob_cm)
- displ_z = b_displ_crust_mantle(3,iglob_cm)
-
- displ_n = displ_x*nx + displ_y*ny + displ_z*nz
-
- ! update fluid acceleration/pressure
- iglob_oc = ibool_outer_core(i,j,k,ispec)
- b_accel_outer_core(iglob_oc) = b_accel_outer_core(iglob_oc) + weight*displ_n
- endif
-
enddo
enddo
enddo
@@ -123,12 +104,12 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine compute_coupling_fluid_ICB(displ_inner_core,b_displ_inner_core, &
- ibool_inner_core,ibelm_top_inner_core, &
- accel_outer_core,b_accel_outer_core, &
- normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
- SIMULATION_TYPE,nspec_bottom)
+ subroutine compute_coupling_fluid_ICB(displ_inner_core, &
+ ibool_inner_core,ibelm_top_inner_core, &
+ accel_outer_core, &
+ normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+ nspec_bottom)
use constants_solver
@@ -136,14 +117,11 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: &
displ_inner_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE_ADJOINT) :: &
- b_displ_inner_core
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
integer, dimension(NSPEC2D_TOP_IC) :: ibelm_top_inner_core
real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: accel_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE_ADJOINT) :: b_accel_outer_core
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: normal_bottom_outer_core
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: jacobian2D_bottom_outer_core
@@ -152,8 +130,7 @@
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
integer, dimension(NSPEC2D_BOTTOM_OC) :: ibelm_bottom_outer_core
- integer SIMULATION_TYPE
- integer nspec_bottom
+ integer :: nspec_bottom
! local parameters
real(kind=CUSTOM_REAL) :: displ_x,displ_y,displ_z,displ_n,nx,ny,nz,weight
@@ -196,22 +173,6 @@
! update fluid acceleration/pressure
accel_outer_core(iglob_oc) = accel_outer_core(iglob_oc) - weight*displ_n
-
- if (SIMULATION_TYPE == 3) then
- ! get displacement in inner core
- iglob_ic = ibool_inner_core(i,j,k_corresp,ispec_selected)
- displ_x = b_displ_inner_core(1,iglob_ic)
- displ_y = b_displ_inner_core(2,iglob_ic)
- displ_z = b_displ_inner_core(3,iglob_ic)
-
- displ_n = displ_x*nx + displ_y*ny + displ_z*nz
-
- ! update fluid acceleration/pressure
- iglob_oc = ibool_outer_core(i,j,k,ispec)
- b_accel_outer_core(iglob_oc) = b_accel_outer_core(iglob_oc) - weight*displ_n
-
- endif
-
enddo
enddo
enddo
@@ -222,14 +183,14 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine compute_coupling_CMB_fluid(displ_crust_mantle,b_displ_crust_mantle, &
- accel_crust_mantle,b_accel_crust_mantle, &
- ibool_crust_mantle,ibelm_bottom_crust_mantle, &
- accel_outer_core,b_accel_outer_core, &
- normal_top_outer_core,jacobian2D_top_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
- RHO_TOP_OC,minus_g_cmb, &
- SIMULATION_TYPE,nspec_bottom)
+ subroutine compute_coupling_CMB_fluid(displ_crust_mantle, &
+ accel_crust_mantle, &
+ ibool_crust_mantle,ibelm_bottom_crust_mantle, &
+ accel_outer_core, &
+ normal_top_outer_core,jacobian2D_top_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+ RHO_TOP_OC,minus_g_cmb, &
+ nspec_bottom)
use constants_solver
@@ -237,14 +198,11 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
displ_crust_mantle,accel_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
- b_displ_crust_mantle,b_accel_crust_mantle
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
integer, dimension(NSPEC2D_BOTTOM_CM) :: ibelm_bottom_crust_mantle
real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: accel_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE_ADJOINT) :: b_accel_outer_core
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_OC) :: normal_top_outer_core
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_OC) :: jacobian2D_top_outer_core
@@ -253,11 +211,10 @@
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
integer, dimension(NSPEC2D_TOP_OC) :: ibelm_top_outer_core
- double precision RHO_TOP_OC
- real(kind=CUSTOM_REAL) minus_g_cmb
+ double precision :: RHO_TOP_OC
+ real(kind=CUSTOM_REAL) :: minus_g_cmb
- integer SIMULATION_TYPE
- integer nspec_bottom
+ integer :: nspec_bottom
! local parameters
real(kind=CUSTOM_REAL) :: pressure,nx,ny,nz,weight
@@ -302,20 +259,6 @@
accel_crust_mantle(1,iglob_mantle) = accel_crust_mantle(1,iglob_mantle) + weight*nx*pressure
accel_crust_mantle(2,iglob_mantle) = accel_crust_mantle(2,iglob_mantle) + weight*ny*pressure
accel_crust_mantle(3,iglob_mantle) = accel_crust_mantle(3,iglob_mantle) + weight*nz*pressure
-
- if (SIMULATION_TYPE == 3) then
- if(GRAVITY_VAL) then
- pressure = RHO_TOP_OC * (- b_accel_outer_core(iglob_oc) &
- + minus_g_cmb *(b_displ_crust_mantle(1,iglob_mantle)*nx &
- + b_displ_crust_mantle(2,iglob_mantle)*ny + b_displ_crust_mantle(3,iglob_mantle)*nz))
- else
- pressure = - RHO_TOP_OC * b_accel_outer_core(iglob_oc)
- endif
- b_accel_crust_mantle(1,iglob_mantle) = b_accel_crust_mantle(1,iglob_mantle) + weight*nx*pressure
- b_accel_crust_mantle(2,iglob_mantle) = b_accel_crust_mantle(2,iglob_mantle) + weight*ny*pressure
- b_accel_crust_mantle(3,iglob_mantle) = b_accel_crust_mantle(3,iglob_mantle) + weight*nz*pressure
- endif
-
enddo
enddo
enddo
@@ -327,14 +270,13 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine compute_coupling_ICB_fluid(displ_inner_core,b_displ_inner_core, &
- accel_inner_core,b_accel_inner_core, &
- ibool_inner_core,ibelm_top_inner_core, &
- accel_outer_core,b_accel_outer_core, &
- normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
- RHO_BOTTOM_OC,minus_g_icb, &
- SIMULATION_TYPE,nspec2D_top)
+ subroutine compute_coupling_ICB_fluid(displ_inner_core,accel_inner_core, &
+ ibool_inner_core,ibelm_top_inner_core, &
+ accel_outer_core, &
+ normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+ RHO_BOTTOM_OC,minus_g_icb, &
+ nspec2D_top)
use constants_solver
@@ -342,14 +284,11 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: &
displ_inner_core,accel_inner_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE_ADJOINT) :: &
- b_displ_inner_core,b_accel_inner_core
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
integer, dimension(NSPEC2D_TOP_IC) :: ibelm_top_inner_core
real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: accel_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE_ADJOINT) :: b_accel_outer_core
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: normal_bottom_outer_core
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: jacobian2D_bottom_outer_core
@@ -358,11 +297,10 @@
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
integer, dimension(NSPEC2D_BOTTOM_OC) :: ibelm_bottom_outer_core
- double precision RHO_BOTTOM_OC
- real(kind=CUSTOM_REAL) minus_g_icb
+ double precision :: RHO_BOTTOM_OC
+ real(kind=CUSTOM_REAL) :: minus_g_icb
- integer SIMULATION_TYPE
- integer nspec2D_top
+ integer :: nspec2D_top
! local parameters
real(kind=CUSTOM_REAL) :: pressure,nx,ny,nz,weight
@@ -407,19 +345,6 @@
accel_inner_core(2,iglob_inner_core) = accel_inner_core(2,iglob_inner_core) - weight*ny*pressure
accel_inner_core(3,iglob_inner_core) = accel_inner_core(3,iglob_inner_core) - weight*nz*pressure
- if (SIMULATION_TYPE == 3) then
- if(GRAVITY_VAL) then
- pressure = RHO_BOTTOM_OC * (- b_accel_outer_core(iglob) &
- + minus_g_icb *(b_displ_inner_core(1,iglob_inner_core)*nx &
- + b_displ_inner_core(2,iglob_inner_core)*ny + b_displ_inner_core(3,iglob_inner_core)*nz))
- else
- pressure = - RHO_BOTTOM_OC * b_accel_outer_core(iglob)
- endif
- b_accel_inner_core(1,iglob_inner_core) = b_accel_inner_core(1,iglob_inner_core) - weight*nx*pressure
- b_accel_inner_core(2,iglob_inner_core) = b_accel_inner_core(2,iglob_inner_core) - weight*ny*pressure
- b_accel_inner_core(3,iglob_inner_core) = b_accel_inner_core(3,iglob_inner_core) - weight*nz*pressure
- endif
-
enddo
enddo
enddo
@@ -435,10 +360,10 @@
rmass_ocean_load,normal_top_crust_mantle, &
ibool_crust_mantle,ibelm_top_crust_mantle, &
updated_dof_ocean_load,NGLOB_XY, &
- nspec_top, &
- ABSORBING_CONDITIONS,EXACT_MASS_MATRIX_FOR_ROTATION,USE_LDDRK)
+ nspec_top)
use constants_solver
+ use specfem_par,only: ABSORBING_CONDITIONS,EXACT_MASS_MATRIX_FOR_ROTATION,USE_LDDRK
implicit none
@@ -464,9 +389,8 @@
integer, dimension(NSPEC2D_TOP_CM) :: ibelm_top_crust_mantle
logical, dimension(NGLOB_CRUST_MANTLE_OCEANS) :: updated_dof_ocean_load
- logical :: ABSORBING_CONDITIONS,EXACT_MASS_MATRIX_FOR_ROTATION,USE_LDDRK
- integer nspec_top
+ integer :: nspec_top
! local parameters
real(kind=CUSTOM_REAL) :: force_normal_comp
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic_calling_routine.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic_calling_routine.F90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic_calling_routine.F90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -27,40 +27,33 @@
subroutine compute_forces_acoustic()
+! acoustic domains for forward or adjoint simulations (SIMULATION_TYPE == 1 or 2 )
+
use specfem_par
- use specfem_par_crustmantle,only: displ_crust_mantle,b_displ_crust_mantle, &
+ use specfem_par_crustmantle,only: displ_crust_mantle, &
ibool_crust_mantle,ibelm_bottom_crust_mantle
- use specfem_par_innercore,only: displ_inner_core,b_displ_inner_core, &
+ use specfem_par_innercore,only: displ_inner_core, &
ibool_inner_core,ibelm_top_inner_core
use specfem_par_outercore
implicit none
! local parameters
- real(kind=CUSTOM_REAL) :: time,b_time
+ real(kind=CUSTOM_REAL) :: time
! non blocking MPI
! iphase: iphase = 1 is for computing outer elements in the outer_core,
! iphase = 2 is for computing inner elements in the outer core (former icall parameter)
integer :: iphase
logical :: phase_is_inner
+ ! checks
+ if( SIMULATION_TYPE == 3 ) return
+
! compute internal forces in the fluid region
if(CUSTOM_REAL == SIZE_REAL) then
time = sngl((dble(it-1)*DT-t0)*scale_t_inv)
else
time = (dble(it-1)*DT-t0)*scale_t_inv
endif
- if (SIMULATION_TYPE == 3) then
- ! note on backward/reconstructed wavefields:
- ! b_time for b_displ( it=1 ) corresponds to (NSTEP - 1)*DT - t0 (after Newmark scheme...)
- ! as we start with saved wavefields b_displ( 1 ) <-> displ( NSTEP ) which correspond
- ! to a time (NSTEP - (it-1) - 1)*DT - t0
- ! for reconstructing the rotational contributions
- if(CUSTOM_REAL == SIZE_REAL) then
- b_time = sngl((dble(NSTEP-it)*DT-t0)*scale_t_inv)
- else
- b_time = (dble(NSTEP-it)*DT-t0)*scale_t_inv
- endif
- endif
! ****************************************************
! big loop over all spectral elements in the fluid
@@ -97,29 +90,10 @@
displ_outer_core,accel_outer_core, &
div_displ_outer_core,phase_is_inner)
endif
-
- ! adjoint / kernel runs
- if (SIMULATION_TYPE == 3) then
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- ! uses Deville et al. (2002) routine
- call compute_forces_outer_core_Dev(b_time,b_deltat,b_two_omega_earth, &
- NSPEC_OUTER_CORE_ROT_ADJOINT,NGLOB_OUTER_CORE_ADJOINT, &
- b_A_array_rotation,b_B_array_rotation, &
- b_displ_outer_core,b_accel_outer_core, &
- b_div_displ_outer_core,phase_is_inner)
- else
- call compute_forces_outer_core(b_time,b_deltat,b_two_omega_earth, &
- NSPEC_OUTER_CORE_ROT_ADJOINT,NGLOB_OUTER_CORE_ADJOINT, &
- b_A_array_rotation,b_B_array_rotation, &
- b_displ_outer_core,b_accel_outer_core, &
- b_div_displ_outer_core,phase_is_inner)
- endif
- endif
-
else
! on GPU
- ! includes both forward and adjoint/kernel simulations
- call compute_forces_outer_core_cuda(Mesh_pointer,iphase,time,b_time)
+ ! includes FORWARD_OR_ADJOINT == 1
+ call compute_forces_outer_core_cuda(Mesh_pointer,iphase,time,1)
endif
@@ -127,7 +101,7 @@
if( iphase == 1 ) then
! Stacey absorbing boundaries
- if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_outer_core()
+ if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_outer_core_forward()
! ****************************************************
! ********** add matching with solid part **********
@@ -139,23 +113,23 @@
!--- couple with mantle at the top of the outer core
!---
if(ACTUALLY_COUPLE_FLUID_CMB) &
- call compute_coupling_fluid_CMB(displ_crust_mantle,b_displ_crust_mantle, &
- ibool_crust_mantle,ibelm_bottom_crust_mantle, &
- accel_outer_core,b_accel_outer_core, &
- normal_top_outer_core,jacobian2D_top_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
- SIMULATION_TYPE,NSPEC2D_TOP(IREGION_OUTER_CORE))
+ call compute_coupling_fluid_CMB(displ_crust_mantle, &
+ ibool_crust_mantle,ibelm_bottom_crust_mantle, &
+ accel_outer_core, &
+ normal_top_outer_core,jacobian2D_top_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+ NSPEC2D_TOP(IREGION_OUTER_CORE))
!---
!--- couple with inner core at the bottom of the outer core
!---
if(ACTUALLY_COUPLE_FLUID_ICB) &
- call compute_coupling_fluid_ICB(displ_inner_core,b_displ_inner_core, &
- ibool_inner_core,ibelm_top_inner_core, &
- accel_outer_core,b_accel_outer_core, &
- normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
- SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_OUTER_CORE))
+ call compute_coupling_fluid_ICB(displ_inner_core, &
+ ibool_inner_core,ibelm_top_inner_core, &
+ accel_outer_core, &
+ normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+ NSPEC2D_BOTTOM(IREGION_OUTER_CORE))
else
! on GPU
@@ -163,12 +137,12 @@
!--- couple with mantle at the top of the outer core
!---
if( ACTUALLY_COUPLE_FLUID_CMB ) &
- call compute_coupling_fluid_cmb_cuda(Mesh_pointer)
+ call compute_coupling_fluid_cmb_cuda(Mesh_pointer,1)
!---
!--- couple with inner core at the bottom of the outer core
!---
if( ACTUALLY_COUPLE_FLUID_ICB ) &
- call compute_coupling_fluid_icb_cuda(Mesh_pointer)
+ call compute_coupling_fluid_icb_cuda(Mesh_pointer,1)
endif
endif ! iphase == 1
@@ -177,7 +151,6 @@
! in outer core
if( iphase == 1 ) then
! sends out MPI interface data (non-blocking)
-
if(.NOT. GPU_MODE) then
! on CPU
call assemble_MPI_scalar_s(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
@@ -198,31 +171,6 @@
request_send_scalar_oc,request_recv_scalar_oc, &
1) ! <-- 1 == fwd accel
endif
-
- ! adjoint simulations
- if( SIMULATION_TYPE == 3 ) then
- if(.NOT. GPU_MODE) then
- ! on CPU
- call assemble_MPI_scalar_s(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
- b_accel_outer_core, &
- b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core, &
- num_interfaces_outer_core,max_nibool_interfaces_oc, &
- nibool_interfaces_outer_core,ibool_interfaces_outer_core,&
- my_neighbours_outer_core, &
- b_request_send_scalar_oc,b_request_recv_scalar_oc)
- else
- ! on GPU
- ! outer core
- call assemble_MPI_scalar_send_cuda(Mesh_pointer,NPROCTOT_VAL, &
- b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core, &
- num_interfaces_outer_core,max_nibool_interfaces_oc, &
- nibool_interfaces_outer_core,&
- my_neighbours_outer_core, &
- b_request_send_scalar_oc,b_request_recv_scalar_oc, &
- 3) ! <-- 3 == adjoint b_accel
- endif ! GPU
- endif ! SIMULATION_TYPE == 3
-
else
! make sure the last communications are finished and processed
! waits for send/receive requests to be completed and assembles values
@@ -242,26 +190,6 @@
request_send_scalar_oc,request_recv_scalar_oc, &
1) ! <-- 1 == fwd accel
endif
-
- ! adjoint simulations
- if( SIMULATION_TYPE == 3 ) then
- if(.NOT. GPU_MODE) then
- ! on CPU
- call assemble_MPI_scalar_w(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
- b_accel_outer_core, &
- b_buffer_recv_scalar_outer_core,num_interfaces_outer_core,&
- max_nibool_interfaces_oc, &
- nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
- b_request_send_scalar_oc,b_request_recv_scalar_oc)
- else
- ! on GPU
- call assemble_MPI_scalar_write_cuda(Mesh_pointer,NPROCTOT_VAL, &
- b_buffer_recv_scalar_outer_core, &
- num_interfaces_outer_core,max_nibool_interfaces_oc, &
- b_request_send_scalar_oc,b_request_recv_scalar_oc, &
- 3) ! <-- 3 == adjoint b_accel
- endif
- endif ! SIMULATION_TYPE == 3
endif ! iphase == 1
enddo ! iphase
@@ -273,18 +201,209 @@
! on CPU
call update_veloc_acoustic(NGLOB_OUTER_CORE,veloc_outer_core,accel_outer_core, &
deltatover2,rmass_outer_core)
+ else
+ ! on GPU
+ ! includes FORWARD_OR_ADJOINT == 1
+ call kernel_3_outer_core_cuda(Mesh_pointer,deltatover2,1)
+ endif
- ! adjoint / kernel runs
- if (SIMULATION_TYPE == 3) &
- call update_veloc_acoustic(NGLOB_OUTER_CORE_ADJOINT,b_veloc_outer_core,b_accel_outer_core, &
- b_deltatover2,rmass_outer_core)
+ end subroutine compute_forces_acoustic
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine compute_forces_acoustic_backward()
+
+! backward/reconstructed wavefields only
+
+ use specfem_par
+ use specfem_par_crustmantle,only: b_displ_crust_mantle, &
+ ibool_crust_mantle,ibelm_bottom_crust_mantle
+ use specfem_par_innercore,only: b_displ_inner_core, &
+ ibool_inner_core,ibelm_top_inner_core
+ use specfem_par_outercore
+ implicit none
+
+ ! local parameters
+ real(kind=CUSTOM_REAL) :: b_time
+ ! non blocking MPI
+ ! iphase: iphase = 1 is for computing outer elements in the outer_core,
+ ! iphase = 2 is for computing inner elements in the outer core (former icall parameter)
+ integer :: iphase
+ logical :: phase_is_inner
+
+ ! checks
+ if( SIMULATION_TYPE /= 3 ) return
+
+ ! compute internal forces in the fluid region
+
+ ! note on backward/reconstructed wavefields:
+ ! b_time for b_displ( it=1 ) corresponds to (NSTEP - 1)*DT - t0 (after Newmark scheme...)
+ ! as we start with saved wavefields b_displ( 1 ) <-> displ( NSTEP ) which correspond
+ ! to a time (NSTEP - (it-1) - 1)*DT - t0
+ ! for reconstructing the rotational contributions
+ if(CUSTOM_REAL == SIZE_REAL) then
+ b_time = sngl((dble(NSTEP-it)*DT-t0)*scale_t_inv)
else
+ b_time = (dble(NSTEP-it)*DT-t0)*scale_t_inv
+ endif
+
+ ! ****************************************************
+ ! big loop over all spectral elements in the fluid
+ ! ****************************************************
+
+ ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions
+ do iphase=1,2
+
+ ! first, iphase == 1 for points on MPI interfaces (thus outer elements)
+ ! second, iphase == 2 for points purely inside partition (thus inner elements)
+ !
+ ! compute all the outer elements first, then sends out non blocking MPI communication
+ ! and continues computing inner elements (overlapping)
+ if( iphase == 1 ) then
+ phase_is_inner = .false.
+ else
+ phase_is_inner = .true.
+ endif
+
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ ! adjoint / kernel runs
+ if( USE_DEVILLE_PRODUCTS_VAL ) then
+ ! uses Deville et al. (2002) routine
+ call compute_forces_outer_core_Dev(b_time,b_deltat,b_two_omega_earth, &
+ NSPEC_OUTER_CORE_ROT_ADJOINT,NGLOB_OUTER_CORE_ADJOINT, &
+ b_A_array_rotation,b_B_array_rotation, &
+ b_displ_outer_core,b_accel_outer_core, &
+ b_div_displ_outer_core,phase_is_inner)
+ else
+ call compute_forces_outer_core(b_time,b_deltat,b_two_omega_earth, &
+ NSPEC_OUTER_CORE_ROT_ADJOINT,NGLOB_OUTER_CORE_ADJOINT, &
+ b_A_array_rotation,b_B_array_rotation, &
+ b_displ_outer_core,b_accel_outer_core, &
+ b_div_displ_outer_core,phase_is_inner)
+ endif
+ else
+ ! on GPU
+ ! includes FORWARD_OR_ADJOINT == 3
+ call compute_forces_outer_core_cuda(Mesh_pointer,iphase,b_time,3)
+ endif
+
+
+ ! computes additional contributions to acceleration field
+ if( iphase == 1 ) then
+
+ ! Stacey absorbing boundaries
+ if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_outer_core_backward()
+
+ ! ****************************************************
+ ! ********** add matching with solid part **********
+ ! ****************************************************
+ ! only for elements in first matching layer in the fluid
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ !---
+ !--- couple with mantle at the top of the outer core
+ !---
+ if(ACTUALLY_COUPLE_FLUID_CMB) &
+ call compute_coupling_fluid_CMB(b_displ_crust_mantle, &
+ ibool_crust_mantle,ibelm_bottom_crust_mantle, &
+ b_accel_outer_core, &
+ normal_top_outer_core,jacobian2D_top_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+ NSPEC2D_TOP(IREGION_OUTER_CORE))
+
+ !---
+ !--- couple with inner core at the bottom of the outer core
+ !---
+ if(ACTUALLY_COUPLE_FLUID_ICB) &
+ call compute_coupling_fluid_ICB(b_displ_inner_core, &
+ ibool_inner_core,ibelm_top_inner_core, &
+ b_accel_outer_core, &
+ normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+ NSPEC2D_BOTTOM(IREGION_OUTER_CORE))
+ else
+ ! on GPU
+ !---
+ !--- couple with mantle at the top of the outer core
+ !---
+ if( ACTUALLY_COUPLE_FLUID_CMB ) &
+ call compute_coupling_fluid_cmb_cuda(Mesh_pointer,3)
+ !---
+ !--- couple with inner core at the bottom of the outer core
+ !---
+ if( ACTUALLY_COUPLE_FLUID_ICB ) &
+ call compute_coupling_fluid_icb_cuda(Mesh_pointer,3)
+
+ endif
+ endif ! iphase == 1
+
+ ! assemble all the contributions between slices using MPI
+ ! in outer core
+ if( iphase == 1 ) then
+ ! sends out MPI interface data (non-blocking)
+ ! adjoint simulations
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call assemble_MPI_scalar_s(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
+ b_accel_outer_core, &
+ b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core, &
+ num_interfaces_outer_core,max_nibool_interfaces_oc, &
+ nibool_interfaces_outer_core,ibool_interfaces_outer_core,&
+ my_neighbours_outer_core, &
+ b_request_send_scalar_oc,b_request_recv_scalar_oc)
+ else
+ ! on GPU
+ ! outer core
+ call assemble_MPI_scalar_send_cuda(Mesh_pointer,NPROCTOT_VAL, &
+ b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core, &
+ num_interfaces_outer_core,max_nibool_interfaces_oc, &
+ nibool_interfaces_outer_core,&
+ my_neighbours_outer_core, &
+ b_request_send_scalar_oc,b_request_recv_scalar_oc, &
+ 3) ! <-- 3 == adjoint b_accel
+ endif ! GPU
+ else
+ ! make sure the last communications are finished and processed
+ ! waits for send/receive requests to be completed and assembles values
+ ! adjoint simulations
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call assemble_MPI_scalar_w(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
+ b_accel_outer_core, &
+ b_buffer_recv_scalar_outer_core,num_interfaces_outer_core,&
+ max_nibool_interfaces_oc, &
+ nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
+ b_request_send_scalar_oc,b_request_recv_scalar_oc)
+ else
+ ! on GPU
+ call assemble_MPI_scalar_write_cuda(Mesh_pointer,NPROCTOT_VAL, &
+ b_buffer_recv_scalar_outer_core, &
+ num_interfaces_outer_core,max_nibool_interfaces_oc, &
+ b_request_send_scalar_oc,b_request_recv_scalar_oc, &
+ 3) ! <-- 3 == adjoint b_accel
+ endif
+ endif ! iphase == 1
+
+ enddo ! iphase
+
+ ! Newmark time scheme:
+ ! corrector terms for fluid parts
+ ! (multiply by the inverse of the mass matrix and update velocity)
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ ! adjoint / kernel runs
+ call update_veloc_acoustic(NGLOB_OUTER_CORE_ADJOINT,b_veloc_outer_core,b_accel_outer_core, &
+ b_deltatover2,rmass_outer_core)
+ else
! on GPU
- call kernel_3_outer_core_cuda(Mesh_pointer, &
- deltatover2,SIMULATION_TYPE,b_deltatover2)
+ ! includes FORWARD_OR_ADJOINT == 3
+ call kernel_3_outer_core_cuda(Mesh_pointer,b_deltatover2,3)
endif
- end subroutine compute_forces_acoustic
+ end subroutine compute_forces_acoustic_backward
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -69,6 +69,10 @@
nspec_outer => nspec_outer_crust_mantle, &
nspec_inner => nspec_inner_crust_mantle
+#ifdef FORCE_VECTORIZATION
+ use specfem_par,only: wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D
+#endif
+
!daniel: att - debug
! use specfem_par,only: it,NSTEP
@@ -421,8 +425,7 @@
sum_terms(2,ijk,1,1) = - (fac1*newtempy1(ijk,1,1) + fac2*newtempy2(ijk,1,1) + fac3*newtempy3(ijk,1,1))
sum_terms(3,ijk,1,1) = - (fac1*newtempz1(ijk,1,1) + fac2*newtempz2(ijk,1,1) + fac3*newtempz3(ijk,1,1))
enddo
-
- ! add gravity terms
+ ! adds gravity terms
if(GRAVITY_VAL) then
do ijk = 1,NDIM*NGLLCUBE
sum_terms(ijk,1,1,1) = sum_terms(ijk,1,1,1) + rho_s_H(ijk,1,1,1)
@@ -435,15 +438,12 @@
do i=1,NGLLX
fac2 = wgllwgll_xz(i,k)
fac3 = wgllwgll_xy(i,j)
-
- ! sum contributions
+ ! sums contributions
sum_terms(1,i,j,k) = - (fac1*newtempx1(i,j,k) + fac2*newtempx2(i,j,k) + fac3*newtempx3(i,j,k))
sum_terms(2,i,j,k) = - (fac1*newtempy1(i,j,k) + fac2*newtempy2(i,j,k) + fac3*newtempy3(i,j,k))
sum_terms(3,i,j,k) = - (fac1*newtempz1(i,j,k) + fac2*newtempz2(i,j,k) + fac3*newtempz3(i,j,k))
-
- ! add gravity terms
+ ! adds gravity terms
if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
-
enddo ! NGLLX
enddo ! NGLLY
enddo ! NGLLZ
@@ -507,23 +507,12 @@
! save deviatoric strain for Runge-Kutta scheme
if(COMPUTE_AND_STORE_STRAIN) then
-
-#ifdef FORCE_VECTORIZATION
- do ijk = 1,NGLLCUBE
- epsilondev_xx(ijk,1,1,ispec) = epsilondev_loc(1,ijk,1,1)
- epsilondev_yy(ijk,1,1,ispec) = epsilondev_loc(2,ijk,1,1)
- epsilondev_xy(ijk,1,1,ispec) = epsilondev_loc(3,ijk,1,1)
- epsilondev_xz(ijk,1,1,ispec) = epsilondev_loc(4,ijk,1,1)
- epsilondev_yz(ijk,1,1,ispec) = epsilondev_loc(5,ijk,1,1)
- enddo
-#else
epsilondev_xx(:,:,:,ispec) = epsilondev_loc(1,:,:,:)
epsilondev_yy(:,:,:,ispec) = epsilondev_loc(2,:,:,:)
epsilondev_xy(:,:,:,ispec) = epsilondev_loc(3,:,:,:)
epsilondev_xz(:,:,:,ispec) = epsilondev_loc(4,:,:,:)
epsilondev_yz(:,:,:,ispec) = epsilondev_loc(5,:,:,:)
endif
-#endif
enddo ! of spectral element loop NSPEC_CRUST_MANTLE
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_noDev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_noDev.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_noDev.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -273,12 +273,13 @@
endif
! precompute terms for attenuation if needed
- if( ATTENUATION_VAL .and. (ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL)) then
- one_minus_sum_beta_use = one_minus_sum_beta(i,j,k,ispec)
+ if( ATTENUATION_VAL ) then
+ if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
+ one_minus_sum_beta_use = one_minus_sum_beta(i,j,k,ispec)
+ else
+ one_minus_sum_beta_use = one_minus_sum_beta(1,1,1,ispec)
+ endif
minus_sum_beta = one_minus_sum_beta_use - 1.0_CUSTOM_REAL
- else if( ATTENUATION_VAL ) then
- one_minus_sum_beta_use = one_minus_sum_beta(1,1,1,ispec)
- minus_sum_beta = one_minus_sum_beta_use - 1.0_CUSTOM_REAL
endif
!
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -62,6 +62,10 @@
nspec_outer => nspec_outer_inner_core, &
nspec_inner => nspec_inner_inner_core
+#ifdef FORCE_VECTORIZATION
+ use specfem_par,only: wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D
+#endif
+
implicit none
integer :: NSPEC,NGLOB,NSPEC_ATT
@@ -171,6 +175,10 @@
integer :: num_elements,ispec_p
integer :: iphase
+#ifdef FORCE_VECTORIZATION
+ integer :: ijk
+#endif
+
! ****************************************************
! big loop over all spectral elements in the solid
! ****************************************************
@@ -334,12 +342,6 @@
epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
endif
- if(ATTENUATION_VAL .and. (ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL)) then
- minus_sum_beta = one_minus_sum_beta(i,j,k,ispec) - 1.0_CUSTOM_REAL
- else if( ATTENUATION_VAL ) then
- minus_sum_beta = one_minus_sum_beta(1,1,1,ispec) - 1.0_CUSTOM_REAL
- endif
-
if(ANISOTROPIC_INNER_CORE_VAL) then
! elastic tensor for hexagonal symmetry in reduced notation:
!
@@ -365,6 +367,11 @@
! use unrelaxed parameters if attenuation
if(ATTENUATION_VAL) then
+ if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
+ minus_sum_beta = one_minus_sum_beta(i,j,k,ispec) - 1.0_CUSTOM_REAL
+ else
+ minus_sum_beta = one_minus_sum_beta(1,1,1,ispec) - 1.0_CUSTOM_REAL
+ endif
mul = muvstore(i,j,k,ispec)
c11l = c11l + FOUR_THIRDS * minus_sum_beta * mul
c12l = c12l - TWO_THIRDS * minus_sum_beta * mul
@@ -387,10 +394,12 @@
mul = muvstore(i,j,k,ispec)
! use unrelaxed parameters if attenuation
- if(ATTENUATION_VAL .and. (ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL)) then
- mul = mul * one_minus_sum_beta(i,j,k,ispec)
- else if( ATTENUATION_VAL ) then
- mul = mul * one_minus_sum_beta(1,1,1,ispec)
+ if(ATTENUATION_VAL ) then
+ if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
+ mul = mul * one_minus_sum_beta(i,j,k,ispec)
+ else
+ mul = mul * one_minus_sum_beta(1,1,1,ispec)
+ endif
endif
lambdalplus2mul = kappal + FOUR_THIRDS * mul
@@ -623,6 +632,21 @@
enddo
enddo
+#ifdef FORCE_VECTORIZATION
+ do ijk=1,NGLLCUBE
+ fac1 = wgllwgll_yz_3D(ijk,1,1)
+ fac2 = wgllwgll_xz_3D(ijk,1,1)
+ fac3 = wgllwgll_xy_3D(ijk,1,1)
+ sum_terms(1,ijk,1,1) = - (fac1*newtempx1(ijk,1,1) + fac2*newtempx2(ijk,1,1) + fac3*newtempx3(ijk,1,1))
+ sum_terms(2,ijk,1,1) = - (fac1*newtempy1(ijk,1,1) + fac2*newtempy2(ijk,1,1) + fac3*newtempy3(ijk,1,1))
+ sum_terms(3,ijk,1,1) = - (fac1*newtempz1(ijk,1,1) + fac2*newtempz2(ijk,1,1) + fac3*newtempz3(ijk,1,1))
+ enddo
+ if(GRAVITY_VAL) then
+ do ijk = 1,NDIM*NGLLCUBE
+ sum_terms(ijk,1,1,1) = sum_terms(ijk,1,1,1) + rho_s_H(ijk,1,1,1)
+ enddo
+ endif
+#else
do k=1,NGLLZ
do j=1,NGLLY
fac1 = wgllwgll_yz(j,k)
@@ -637,8 +661,26 @@
enddo
enddo
enddo
+#endif
! sum contributions from each element to the global mesh and add gravity terms
+#ifdef FORCE_VECTORIZATION
+! we can force vectorization using a compiler directive here because we know that there is no dependency
+! inside a given spectral element, since all the global points of a local elements are different by definition
+! (only common points between different elements can be the same)
+! IBM, Portland PGI, and Intel and Cray syntax (Intel and Cray are the same)
+!IBM* ASSERT (NODEPS)
+!pgi$ ivdep
+!DIR$ IVDEP
+ do ijk = 1,NGLLCUBE
+ iglob = ibool(ijk,1,1,ispec)
+ ! do NOT use array syntax ":" for the three statements below
+ ! otherwise most compilers will not be able to vectorize the outer loop
+ accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(1,ijk,1,1)
+ accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(2,ijk,1,1)
+ accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(3,ijk,1,1)
+ enddo
+#else
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -647,6 +689,7 @@
enddo
enddo
enddo
+#endif
! use Runge-Kutta scheme to march memory variables in time
! convention for attenuation
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_noDev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_noDev.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_noDev.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -215,7 +215,7 @@
gammayl = gammay(i,j,k,ispec)
gammazl = gammaz(i,j,k,ispec)
-! compute the jacobian
+ ! compute the jacobian
jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- xiyl*(etaxl*gammazl-etazl*gammaxl) &
+ xizl*(etaxl*gammayl-etayl*gammaxl))
@@ -232,7 +232,7 @@
duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l
duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l
-! precompute some sums to save CPU time
+ ! precompute some sums to save CPU time
duxdxl_plus_duydyl = duxdxl + duydyl
duxdxl_plus_duzdzl = duxdxl + duzdzl
duydyl_plus_duzdzl = duydyl + duzdzl
@@ -255,40 +255,36 @@
epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
endif
- ! precompute terms for attenuation if needed
- if( ATTENUATION_VAL .and. (ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL)) then
- minus_sum_beta = one_minus_sum_beta(i,j,k,ispec) - 1.0_CUSTOM_REAL
- else if( ATTENUATION_VAL ) then
- minus_sum_beta = one_minus_sum_beta(1,1,1,ispec) - 1.0_CUSTOM_REAL
- endif
-
if(ANISOTROPIC_INNER_CORE_VAL) then
-
-! elastic tensor for hexagonal symmetry in reduced notation:
-!
-! c11 c12 c13 0 0 0
-! c12 c11 c13 0 0 0
-! c13 c13 c33 0 0 0
-! 0 0 0 c44 0 0
-! 0 0 0 0 c44 0
-! 0 0 0 0 0 (c11-c12)/2
-!
-! in terms of the A, C, L, N and F of Love (1927):
-!
-! c11 = A
-! c12 = A-2N
-! c13 = F
-! c33 = C
-! c44 = L
-
+ ! elastic tensor for hexagonal symmetry in reduced notation:
+ !
+ ! c11 c12 c13 0 0 0
+ ! c12 c11 c13 0 0 0
+ ! c13 c13 c33 0 0 0
+ ! 0 0 0 c44 0 0
+ ! 0 0 0 0 c44 0
+ ! 0 0 0 0 0 (c11-c12)/2
+ !
+ ! in terms of the A, C, L, N and F of Love (1927):
+ !
+ ! c11 = A
+ ! c12 = A-2N
+ ! c13 = F
+ ! c33 = C
+ ! c44 = L
c11l = c11store(i,j,k,ispec)
c12l = c12store(i,j,k,ispec)
c13l = c13store(i,j,k,ispec)
c33l = c33store(i,j,k,ispec)
c44l = c44store(i,j,k,ispec)
-! use unrelaxed parameters if attenuation
+ ! use unrelaxed parameters if attenuation
if(ATTENUATION_VAL) then
+ if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
+ minus_sum_beta = one_minus_sum_beta(i,j,k,ispec) - 1.0_CUSTOM_REAL
+ else
+ minus_sum_beta = one_minus_sum_beta(1,1,1,ispec) - 1.0_CUSTOM_REAL
+ endif
mul = muvstore(i,j,k,ispec)
c11l = c11l + FOUR_THIRDS * minus_sum_beta * mul
c12l = c12l - TWO_THIRDS * minus_sum_beta * mul
@@ -305,23 +301,24 @@
sigma_yz = c44l*duzdyl_plus_duydzl
else
-! inner core with no anisotropy, use kappav and muv for instance
-! layer with no anisotropy, use kappav and muv for instance
+ ! inner core with no anisotropy, use kappav and muv for instance
+ ! layer with no anisotropy, use kappav and muv for instance
kappal = kappavstore(i,j,k,ispec)
mul = muvstore(i,j,k,ispec)
! use unrelaxed parameters if attenuation
- if( ATTENUATION_VAL .and. (ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL)) then
- mul = mul * one_minus_sum_beta(i,j,k,ispec)
- else if( ATTENUATION_VAL ) then
- mul = mul * one_minus_sum_beta(1,1,1,ispec)
+ if( ATTENUATION_VAL ) then
+ if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
+ mul = mul * one_minus_sum_beta(i,j,k,ispec)
+ else
+ mul = mul * one_minus_sum_beta(1,1,1,ispec)
+ endif
endif
lambdalplus2mul = kappal + FOUR_THIRDS * mul
lambdal = lambdalplus2mul - 2.*mul
-! compute stress sigma
-
+ ! compute stress sigma
sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
@@ -332,7 +329,7 @@
endif
-! subtract memory variables if attenuation
+ ! subtract memory variables if attenuation
if( ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL ) then
do i_SLS = 1,N_SLS
R_xx_val = R_xx(i_SLS,i,j,k,ispec)
@@ -346,24 +343,23 @@
enddo
endif
-! define symmetric components of sigma for gravity
+ ! define symmetric components of sigma for gravity
sigma_yx = sigma_xy
sigma_zx = sigma_xz
sigma_zy = sigma_yz
-! compute non-symmetric terms for gravity
+ ! compute non-symmetric terms for gravity
if(GRAVITY_VAL) then
-! use mesh coordinates to get theta and phi
-! x y and z contain r theta and phi
-
+ ! use mesh coordinates to get theta and phi
+ ! x y and z contain r theta and phi
iglob = ibool(i,j,k,ispec)
radius = dble(xstore(iglob))
theta = dble(ystore(iglob))
phi = dble(zstore(iglob))
-! make sure radius is never zero even for points at center of cube
-! because we later divide by radius
+ ! make sure radius is never zero even for points at center of cube
+ ! because we later divide by radius
if(radius < 100.d0 / R_EARTH) radius = 100.d0 / R_EARTH
cos_theta = dcos(theta)
@@ -371,23 +367,22 @@
cos_phi = dcos(phi)
sin_phi = dsin(phi)
-! get g, rho and dg/dr=dg
-! spherical components of the gravitational acceleration
-! for efficiency replace with lookup table every 100 m in radial direction
-! make sure we never use zero for point exactly at the center of the Earth
+ ! get g, rho and dg/dr=dg
+ ! spherical components of the gravitational acceleration
+ ! for efficiency replace with lookup table every 100 m in radial direction
+ ! make sure we never use zero for point exactly at the center of the Earth
int_radius = max(1,nint(radius * R_EARTH_KM * 10.d0))
minus_g = minus_gravity_table(int_radius)
minus_dg = minus_deriv_gravity_table(int_radius)
rho = density_table(int_radius)
-! Cartesian components of the gravitational acceleration
+ ! Cartesian components of the gravitational acceleration
gxl = minus_g*sin_theta*cos_phi
gyl = minus_g*sin_theta*sin_phi
gzl = minus_g*cos_theta
-! Cartesian components of gradient of gravitational acceleration
-! obtained from spherical components
-
+ ! Cartesian components of gradient of gravitational acceleration
+ ! obtained from spherical components
minus_g_over_radius = minus_g / radius
minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius
@@ -405,15 +400,15 @@
iglob = ibool(i,j,k,ispec)
-! distinguish between single and double precision for reals
+ ! distinguish between single and double precision for reals
if(CUSTOM_REAL == SIZE_REAL) then
-! get displacement and multiply by density to compute G tensor
+ ! get displacement and multiply by density to compute G tensor
sx_l = rho * dble(displ_inner_core(1,iglob))
sy_l = rho * dble(displ_inner_core(2,iglob))
sz_l = rho * dble(displ_inner_core(3,iglob))
-! compute G tensor from s . g and add to sigma (not symmetric)
+ ! compute G tensor from s . g and add to sigma (not symmetric)
sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
@@ -427,7 +422,7 @@
sigma_yz = sigma_yz - sngl(sy_l * gzl)
sigma_zy = sigma_zy - sngl(sz_l * gyl)
-! precompute vector
+ ! precompute vector
factor = dble(jacobianl) * wgll_cube(i,j,k)
rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
@@ -435,12 +430,12 @@
else
-! get displacement and multiply by density to compute G tensor
+ ! get displacement and multiply by density to compute G tensor
sx_l = rho * displ_inner_core(1,iglob)
sy_l = rho * displ_inner_core(2,iglob)
sz_l = rho * displ_inner_core(3,iglob)
-! compute G tensor from s . g and add to sigma (not symmetric)
+ ! compute G tensor from s . g and add to sigma (not symmetric)
sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
@@ -454,7 +449,7 @@
sigma_yz = sigma_yz - sy_l * gzl
sigma_zy = sigma_zy - sz_l * gyl
-! precompute vector
+ ! precompute vector
factor = jacobianl * wgll_cube(i,j,k)
rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
@@ -464,8 +459,7 @@
endif ! end of section with gravity terms
-! form dot product with test vector, non-symmetric form
-
+ ! form dot product with test vector, non-symmetric form
tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl)
tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl)
tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl)
@@ -533,7 +527,7 @@
enddo
enddo
-! sum contributions from each element to the global mesh and add gravity terms
+ ! sum contributions from each element to the global mesh and add gravity terms
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -598,8 +592,7 @@
endif
if (COMPUTE_AND_STORE_STRAIN) then
-! save deviatoric strain for Runge-Kutta scheme
- !epsilondev(:,:,:,:,ispec) = epsilondev_loc(:,:,:,:)
+ ! save deviatoric strain for Runge-Kutta scheme
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -611,7 +604,6 @@
enddo
enddo
enddo
-
endif
endif ! end test to exclude fictitious elements in central cube
Copied: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.F90 (from rev 22734, seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90)
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.F90 (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.F90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -0,0 +1,456 @@
+!=====================================================================
+!
+! S p e c f e m 3 D G l o b e V e r s i o n 6 . 0
+! --------------------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA
+! and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+! August 2013
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine compute_forces_outer_core_Dev(time,deltat,two_omega_earth, &
+ NSPEC,NGLOB, &
+ A_array_rotation,B_array_rotation, &
+ displfluid,accelfluid, &
+ div_displfluid,phase_is_inner)
+
+! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
+
+ use constants_solver
+
+ use specfem_par,only: &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+ minus_rho_g_over_kappa_fluid,d_ln_density_dr_table, &
+ MOVIE_VOLUME
+
+ use specfem_par_outercore,only: &
+ xstore => xstore_outer_core,ystore => ystore_outer_core,zstore => zstore_outer_core, &
+ xix => xix_outer_core,xiy => xiy_outer_core,xiz => xiz_outer_core, &
+ etax => etax_outer_core,etay => etay_outer_core,etaz => etaz_outer_core, &
+ gammax => gammax_outer_core,gammay => gammay_outer_core,gammaz => gammaz_outer_core, &
+ ibool => ibool_outer_core, &
+ phase_ispec_inner => phase_ispec_inner_outer_core, &
+ nspec_outer => nspec_outer_outer_core, &
+ nspec_inner => nspec_inner_outer_core
+
+#ifdef FORCE_VECTORIZATION
+ use specfem_par,only: wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D
+#endif
+
+ implicit none
+
+ integer :: NSPEC,NGLOB
+
+ ! for the Euler scheme for rotation
+ real(kind=CUSTOM_REAL) time,deltat,two_omega_earth
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: &
+ A_array_rotation,B_array_rotation
+
+ ! displacement and acceleration
+ real(kind=CUSTOM_REAL), dimension(NGLOB) :: displfluid,accelfluid
+
+ ! divergence of displacement
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: div_displfluid
+
+ ! inner/outer element run flag
+ logical :: phase_is_inner
+
+ ! local parameters
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: newtempx1,newtempx2,newtempx3
+ ! for gravity
+ integer :: int_radius
+ double precision :: radius,theta,phi,gxl,gyl,gzl
+ double precision :: cos_theta,sin_theta,cos_phi,sin_phi
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: gravity_term
+ ! for the Euler scheme for rotation
+ real(kind=CUSTOM_REAL) :: two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rotation,B_rotation, &
+ ux_rotation,uy_rotation,dpotentialdx_with_rot,dpotentialdy_with_rot
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: source_euler_A,source_euler_B
+
+ integer :: ispec,iglob
+ integer :: i,j,k
+ real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+ real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
+ real(kind=CUSTOM_REAL) :: sum_terms
+
+ ! manually inline the calls to the Deville et al. (2002) routines
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points
+ real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points
+ real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_5points
+
+ equivalence(dummyx_loc,B1_m1_m2_5points)
+ equivalence(tempx1,C1_m1_m2_5points)
+ equivalence(newtempx1,E1_m1_m2_5points)
+
+ real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: A1_mxm_m2_m1_5points
+ real(kind=CUSTOM_REAL), dimension(m2,m1) :: C1_mxm_m2_m1_5points
+ real(kind=CUSTOM_REAL), dimension(m2,m1) :: E1_mxm_m2_m1_5points
+
+ equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
+ equivalence(tempx3,C1_mxm_m2_m1_5points)
+ equivalence(newtempx3,E1_mxm_m2_m1_5points)
+
+ double precision, dimension(NGLLX,NGLLY,NGLLZ) :: temp_gxl,temp_gyl,temp_gzl
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+ displ_times_grad_x_ln_rho,displ_times_grad_y_ln_rho,displ_times_grad_z_ln_rho
+
+! integer :: computed_elements
+ integer :: num_elements,ispec_p
+ integer :: iphase
+
+! ****************************************************
+! big loop over all spectral elements in the fluid
+! ****************************************************
+
+ if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. (.not. phase_is_inner) ) div_displfluid(:,:,:,:) = 0._CUSTOM_REAL
+
+! computed_elements = 0
+ if( .not. phase_is_inner ) then
+ iphase = 1
+ num_elements = nspec_outer
+ else
+ iphase = 2
+ num_elements = nspec_inner
+ endif
+
+ do ispec_p = 1,num_elements
+
+ ispec = phase_ispec_inner(ispec_p,iphase)
+
+ ! only compute element which belong to current phase (inner or outer elements)
+
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
+
+ ! stores "displacement"
+ dummyx_loc(i,j,k) = displfluid(iglob)
+
+ ! pre-computes factors
+ ! use mesh coordinates to get theta and phi
+ ! x y z contain r theta phi
+ radius = dble(xstore(iglob))
+ theta = dble(ystore(iglob))
+ phi = dble(zstore(iglob))
+
+ cos_theta = dcos(theta)
+ sin_theta = dsin(theta)
+ cos_phi = dcos(phi)
+ sin_phi = dsin(phi)
+
+ int_radius = nint(radius * R_EARTH_KM * 10.d0)
+
+ if( .not. GRAVITY_VAL ) then
+ ! grad(rho)/rho in Cartesian components
+ displ_times_grad_x_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
+ * sngl(sin_theta * cos_phi * d_ln_density_dr_table(int_radius))
+ displ_times_grad_y_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
+ * sngl(sin_theta * sin_phi * d_ln_density_dr_table(int_radius))
+ displ_times_grad_z_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
+ * sngl(cos_theta * d_ln_density_dr_table(int_radius))
+ else
+ ! Cartesian components of the gravitational acceleration
+ ! integrate and multiply by rho / Kappa
+ temp_gxl(i,j,k) = sin_theta*cos_phi
+ temp_gyl(i,j,k) = sin_theta*sin_phi
+ temp_gzl(i,j,k) = cos_theta
+ endif
+
+ enddo
+ enddo
+ enddo
+
+ ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+ ! for incompressible fluid flow, Cambridge University Press (2002),
+ ! pages 386 and 389 and Figure 8.3.1
+ do j=1,m2
+ do i=1,m1
+ C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
+ hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
+ hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
+ hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
+ hprime_xx(i,5)*B1_m1_m2_5points(5,j)
+ enddo
+ enddo
+
+ do k = 1,NGLLX
+ do j=1,m1
+ do i=1,m1
+ tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
+ dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
+ dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
+ dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
+ dummyx_loc(i,5,k)*hprime_xxT(5,j)
+ enddo
+ enddo
+ enddo
+
+ do j=1,m1
+ do i=1,m2
+ C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+ A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+ A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+ A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+ A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+ enddo
+ enddo
+
+
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+
+ ! get derivatives of velocity potential with respect to x, y and z
+ xixl = xix(i,j,k,ispec)
+ xiyl = xiy(i,j,k,ispec)
+ xizl = xiz(i,j,k,ispec)
+ etaxl = etax(i,j,k,ispec)
+ etayl = etay(i,j,k,ispec)
+ etazl = etaz(i,j,k,ispec)
+ gammaxl = gammax(i,j,k,ispec)
+ gammayl = gammay(i,j,k,ispec)
+ gammazl = gammaz(i,j,k,ispec)
+
+ ! compute the jacobian
+ jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+ - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+ + xizl*(etaxl*gammayl-etayl*gammaxl))
+
+ dpotentialdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
+ dpotentialdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
+ dpotentialdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
+
+ ! compute contribution of rotation and add to gradient of potential
+ ! this term has no Z component
+ if(ROTATION_VAL) then
+
+ ! store the source for the Euler scheme for A_rotation and B_rotation
+ two_omega_deltat = deltat * two_omega_earth
+
+ cos_two_omega_t = cos(two_omega_earth*time)
+ sin_two_omega_t = sin(two_omega_earth*time)
+
+ ! time step deltat of Euler scheme is included in the source
+ source_euler_A(i,j,k) = two_omega_deltat &
+ * (cos_two_omega_t * dpotentialdyl + sin_two_omega_t * dpotentialdxl)
+ source_euler_B(i,j,k) = two_omega_deltat &
+ * (sin_two_omega_t * dpotentialdyl - cos_two_omega_t * dpotentialdxl)
+
+ A_rotation = A_array_rotation(i,j,k,ispec)
+ B_rotation = B_array_rotation(i,j,k,ispec)
+
+ ux_rotation = A_rotation*cos_two_omega_t + B_rotation*sin_two_omega_t
+ uy_rotation = - A_rotation*sin_two_omega_t + B_rotation*cos_two_omega_t
+
+ dpotentialdx_with_rot = dpotentialdxl + ux_rotation
+ dpotentialdy_with_rot = dpotentialdyl + uy_rotation
+
+ else
+
+ dpotentialdx_with_rot = dpotentialdxl
+ dpotentialdy_with_rot = dpotentialdyl
+
+ endif ! end of section with rotation
+
+ ! add (chi/rho)grad(rho) term in no gravity case
+ if(.not. GRAVITY_VAL) then
+
+ ! With regards to the non-gravitating case: we cannot set N^2 = 0 *and* let g = 0.
+ ! We can *either* assume N^2 = 0 but keep gravity g, *or* we can assume that gravity
+ ! is negligible to begin with, as in our GJI 2002a, in which case N does not arise.
+ ! We get:
+ !
+ ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
+ !
+ ! Then the displacement is
+ !
+ ! \bu = \bdel\chi+\chi\bdel\ln\rho = \rho^{-1}\bdel(\rho\chi)
+ !
+ ! and the pressure is
+ !
+ ! p = -\rho\ddot{\chi}
+ !
+ ! Thus in our 2002b GJI paper eqn (21) is wrong, and equation (41)
+ ! in our AGU monograph is incorrect; these equations should be replaced by
+ !
+ ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
+ !
+ ! Note that the fluid potential we use in GJI 2002a differs from the one used here:
+ !
+ ! \chi_GJI2002a = \rho\partial\t\chi
+ !
+ ! such that
+ !
+ ! \bv = \partial_t\bu=\rho^{-1}\bdel\chi_GJI2002a (GJI 2002a eqn 20)
+ !
+ ! p = - \partial_t\chi_GJI2002a (GJI 2002a eqn 19)
+
+ ! use mesh coordinates to get theta and phi
+ ! x y z contain r theta phi
+ dpotentialdx_with_rot = dpotentialdx_with_rot + displ_times_grad_x_ln_rho(i,j,k)
+ dpotentialdy_with_rot = dpotentialdy_with_rot + displ_times_grad_y_ln_rho(i,j,k)
+ dpotentialdzl = dpotentialdzl + displ_times_grad_z_ln_rho(i,j,k)
+
+ else ! if gravity is turned on
+
+ ! compute divergence of displacment
+ gxl = temp_gxl(i,j,k)
+ gyl = temp_gyl(i,j,k)
+ gzl = temp_gzl(i,j,k)
+
+ ! distinguish between single and double precision for reals
+ if(CUSTOM_REAL == SIZE_REAL) then
+ gravity_term(i,j,k) = &
+ sngl( minus_rho_g_over_kappa_fluid(int_radius) &
+ * dble(jacobianl) * wgll_cube(i,j,k) &
+ * (dble(dpotentialdx_with_rot) * gxl &
+ + dble(dpotentialdy_with_rot) * gyl &
+ + dble(dpotentialdzl) * gzl) )
+ else
+ gravity_term(i,j,k) = minus_rho_g_over_kappa_fluid(int_radius) * &
+ jacobianl * wgll_cube(i,j,k) &
+ * (dpotentialdx_with_rot * gxl &
+ + dpotentialdy_with_rot * gyl &
+ + dpotentialdzl * gzl)
+ endif
+
+ ! divergence of displacement field with gravity on
+ ! note: these calculations are only considered for SIMULATION_TYPE == 1 .and. SAVE_FORWARD
+ ! and one has set MOVIE_VOLUME_TYPE == 4 when MOVIE_VOLUME is .true.;
+ ! in case of SIMULATION_TYPE == 3, it gets overwritten by compute_kernels_outer_core()
+ if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. MOVIE_VOLUME) then
+ div_displfluid(i,j,k,ispec) = &
+ minus_rho_g_over_kappa_fluid(int_radius) &
+ * (dpotentialdx_with_rot * gxl &
+ + dpotentialdy_with_rot * gyl &
+ + dpotentialdzl * gzl)
+ endif
+
+ endif
+
+ tempx1(i,j,k) = jacobianl*(xixl*dpotentialdx_with_rot &
+ + xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl)
+ tempx2(i,j,k) = jacobianl*(etaxl*dpotentialdx_with_rot &
+ + etayl*dpotentialdy_with_rot + etazl*dpotentialdzl)
+ tempx3(i,j,k) = jacobianl*(gammaxl*dpotentialdx_with_rot &
+ + gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl)
+
+ enddo
+ enddo
+ enddo
+
+ ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+ ! for incompressible fluid flow, Cambridge University Press (2002),
+ ! pages 386 and 389 and Figure 8.3.1
+ do j=1,m2
+ do i=1,m1
+ E1_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_5points(1,j) + &
+ hprimewgll_xxT(i,2)*C1_m1_m2_5points(2,j) + &
+ hprimewgll_xxT(i,3)*C1_m1_m2_5points(3,j) + &
+ hprimewgll_xxT(i,4)*C1_m1_m2_5points(4,j) + &
+ hprimewgll_xxT(i,5)*C1_m1_m2_5points(5,j)
+ enddo
+ enddo
+
+ do k = 1,NGLLX
+ do j=1,m1
+ do i=1,m1
+ newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
+ tempx2(i,2,k)*hprimewgll_xx(2,j) + &
+ tempx2(i,3,k)*hprimewgll_xx(3,j) + &
+ tempx2(i,4,k)*hprimewgll_xx(4,j) + &
+ tempx2(i,5,k)*hprimewgll_xx(5,j)
+ enddo
+ enddo
+ enddo
+
+ do j=1,m1
+ do i=1,m2
+ E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
+ C1_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
+ C1_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
+ C1_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
+ C1_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
+ enddo
+ enddo
+
+#ifdef FORCE_VECTORIZATION
+ ! sum contributions from each element to the global mesh and add gravity term
+ do ijk=1,NGLLCUBE
+ sum_terms(ijk,1,1) = - (wgllwgll_yz_3D(ijk,1,1)*newtempx1(ijk,1,1) &
+ + wgllwgll_xz_3D(ijk,1,1)*newtempx2(ijk,1,1) &
+ + wgllwgll_xy_3D(ijk,1,1)*newtempx3(ijk,1,1))
+ enddo
+ if(GRAVITY_VAL) then
+ do ijk = 1,NGLLCUBE
+ sum_terms(ijk,1,1) = sum_terms(ijk,1,1) + gravity_term(ijk,1,1)
+ enddo
+ endif
+! we can force vectorization using a compiler directive here because we know that there is no dependency
+! inside a given spectral element, since all the global points of a local elements are different by definition
+! (only common points between different elements can be the same)
+! IBM, Portland PGI, and Intel and Cray syntax (Intel and Cray are the same)
+!IBM* ASSERT (NODEPS)
+!pgi$ ivdep
+!DIR$ IVDEP
+ do ijk = 1,NGLLCUBE
+ iglob = ibool(ijk,1,1,ispec)
+ accelfluid(iglob) = accelfluid(iglob) + sum_terms(ijk,1,1)
+ enddo
+ ! update rotation term with Euler scheme
+ if(ROTATION_VAL) then
+ do ijk = 1,NGLLCUBE
+ A_array_rotation(ijk,1,1,ispec) = A_array_rotation(ijk,1,1,ispec) + source_euler_A(ijk,1,1)
+ B_array_rotation(ijk,1,1,ispec) = B_array_rotation(ijk,1,1,ispec) + source_euler_B(ijk,1,1)
+ enddo
+ endif
+#else
+ ! sum contributions from each element to the global mesh and add gravity term
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ sum_terms = - (wgllwgll_yz(j,k)*newtempx1(i,j,k) &
+ + wgllwgll_xz(i,k)*newtempx2(i,j,k) &
+ + wgllwgll_xy(i,j)*newtempx3(i,j,k))
+
+ if(GRAVITY_VAL) sum_terms = sum_terms + gravity_term(i,j,k)
+
+ iglob = ibool(i,j,k,ispec)
+ accelfluid(iglob) = accelfluid(iglob) + sum_terms
+ enddo
+ enddo
+ enddo
+ ! update rotation term with Euler scheme
+ if(ROTATION_VAL) then
+ ! use the source saved above
+ A_array_rotation(:,:,:,ispec) = A_array_rotation(:,:,:,ispec) + source_euler_A(:,:,:)
+ B_array_rotation(:,:,:,ispec) = B_array_rotation(:,:,:,ispec) + source_euler_B(:,:,:)
+ endif
+#endif
+
+ enddo ! spectral element loop
+
+ end subroutine compute_forces_outer_core_Dev
+
Deleted: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -1,423 +0,0 @@
-!=====================================================================
-!
-! S p e c f e m 3 D G l o b e V e r s i o n 6 . 0
-! --------------------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA
-! and CNRS / INRIA / University of Pau, France
-! (c) Princeton University and CNRS / INRIA / University of Pau
-! August 2013
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine compute_forces_outer_core_Dev(time,deltat,two_omega_earth, &
- NSPEC,NGLOB, &
- A_array_rotation,B_array_rotation, &
- displfluid,accelfluid, &
- div_displfluid,phase_is_inner)
-
-! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
-
- use constants_solver
-
- use specfem_par,only: &
- hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
- minus_rho_g_over_kappa_fluid,d_ln_density_dr_table, &
- MOVIE_VOLUME
-
- use specfem_par_outercore,only: &
- xstore => xstore_outer_core,ystore => ystore_outer_core,zstore => zstore_outer_core, &
- xix => xix_outer_core,xiy => xiy_outer_core,xiz => xiz_outer_core, &
- etax => etax_outer_core,etay => etay_outer_core,etaz => etaz_outer_core, &
- gammax => gammax_outer_core,gammay => gammay_outer_core,gammaz => gammaz_outer_core, &
- ibool => ibool_outer_core, &
- phase_ispec_inner => phase_ispec_inner_outer_core, &
- nspec_outer => nspec_outer_outer_core, &
- nspec_inner => nspec_inner_outer_core
-
- implicit none
-
- integer :: NSPEC,NGLOB
-
- ! for the Euler scheme for rotation
- real(kind=CUSTOM_REAL) time,deltat,two_omega_earth
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: &
- A_array_rotation,B_array_rotation
-
- ! displacement and acceleration
- real(kind=CUSTOM_REAL), dimension(NGLOB) :: displfluid,accelfluid
-
- ! divergence of displacement
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: div_displfluid
-
- ! inner/outer element run flag
- logical :: phase_is_inner
-
- ! local parameters
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: newtempx1,newtempx2,newtempx3
- ! for gravity
- integer :: int_radius
- double precision :: radius,theta,phi,gxl,gyl,gzl
- double precision :: cos_theta,sin_theta,cos_phi,sin_phi
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: gravity_term
- ! for the Euler scheme for rotation
- real(kind=CUSTOM_REAL) :: two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rotation,B_rotation, &
- ux_rotation,uy_rotation,dpotentialdx_with_rot,dpotentialdy_with_rot
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: source_euler_A,source_euler_B
-
- integer :: ispec,iglob
- integer :: i,j,k
- real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
- real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
- real(kind=CUSTOM_REAL) :: sum_terms
-
- ! manually inline the calls to the Deville et al. (2002) routines
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_5points
-
- equivalence(dummyx_loc,B1_m1_m2_5points)
- equivalence(tempx1,C1_m1_m2_5points)
- equivalence(newtempx1,E1_m1_m2_5points)
-
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: A1_mxm_m2_m1_5points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: C1_mxm_m2_m1_5points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: E1_mxm_m2_m1_5points
-
- equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
- equivalence(tempx3,C1_mxm_m2_m1_5points)
- equivalence(newtempx3,E1_mxm_m2_m1_5points)
-
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: temp_gxl,temp_gyl,temp_gzl
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
- displ_times_grad_x_ln_rho,displ_times_grad_y_ln_rho,displ_times_grad_z_ln_rho
-
-! integer :: computed_elements
- integer :: num_elements,ispec_p
- integer :: iphase
-
-! ****************************************************
-! big loop over all spectral elements in the fluid
-! ****************************************************
-
- if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. (.not. phase_is_inner) ) div_displfluid(:,:,:,:) = 0._CUSTOM_REAL
-
-! computed_elements = 0
- if( .not. phase_is_inner ) then
- iphase = 1
- num_elements = nspec_outer
- else
- iphase = 2
- num_elements = nspec_inner
- endif
-
- do ispec_p = 1,num_elements
-
- ispec = phase_ispec_inner(ispec_p,iphase)
-
- ! only compute element which belong to current phase (inner or outer elements)
-
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
-
- ! stores "displacement"
- dummyx_loc(i,j,k) = displfluid(iglob)
-
- ! pre-computes factors
- ! use mesh coordinates to get theta and phi
- ! x y z contain r theta phi
- radius = dble(xstore(iglob))
- theta = dble(ystore(iglob))
- phi = dble(zstore(iglob))
-
- cos_theta = dcos(theta)
- sin_theta = dsin(theta)
- cos_phi = dcos(phi)
- sin_phi = dsin(phi)
-
- int_radius = nint(radius * R_EARTH_KM * 10.d0)
-
- if( .not. GRAVITY_VAL ) then
- ! grad(rho)/rho in Cartesian components
- displ_times_grad_x_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
- * sngl(sin_theta * cos_phi * d_ln_density_dr_table(int_radius))
- displ_times_grad_y_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
- * sngl(sin_theta * sin_phi * d_ln_density_dr_table(int_radius))
- displ_times_grad_z_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
- * sngl(cos_theta * d_ln_density_dr_table(int_radius))
- else
- ! Cartesian components of the gravitational acceleration
- ! integrate and multiply by rho / Kappa
- temp_gxl(i,j,k) = sin_theta*cos_phi
- temp_gyl(i,j,k) = sin_theta*sin_phi
- temp_gzl(i,j,k) = cos_theta
- endif
-
- enddo
- enddo
- enddo
-
- ! subroutines adapted from Deville, Fischer and Mund, High-order methods
- ! for incompressible fluid flow, Cambridge University Press (2002),
- ! pages 386 and 389 and Figure 8.3.1
- do j=1,m2
- do i=1,m1
- C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B1_m1_m2_5points(5,j)
- enddo
- enddo
-
- do k = 1,NGLLX
- do j=1,m1
- do i=1,m1
- tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyx_loc(i,5,k)*hprime_xxT(5,j)
- enddo
- enddo
- enddo
-
- do j=1,m1
- do i=1,m2
- C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
- enddo
- enddo
-
-
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
-
- ! get derivatives of velocity potential with respect to x, y and z
- xixl = xix(i,j,k,ispec)
- xiyl = xiy(i,j,k,ispec)
- xizl = xiz(i,j,k,ispec)
- etaxl = etax(i,j,k,ispec)
- etayl = etay(i,j,k,ispec)
- etazl = etaz(i,j,k,ispec)
- gammaxl = gammax(i,j,k,ispec)
- gammayl = gammay(i,j,k,ispec)
- gammazl = gammaz(i,j,k,ispec)
-
- ! compute the jacobian
- jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
-
- dpotentialdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
- dpotentialdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
- dpotentialdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
- ! compute contribution of rotation and add to gradient of potential
- ! this term has no Z component
- if(ROTATION_VAL) then
-
- ! store the source for the Euler scheme for A_rotation and B_rotation
- two_omega_deltat = deltat * two_omega_earth
-
- cos_two_omega_t = cos(two_omega_earth*time)
- sin_two_omega_t = sin(two_omega_earth*time)
-
- ! time step deltat of Euler scheme is included in the source
- source_euler_A(i,j,k) = two_omega_deltat &
- * (cos_two_omega_t * dpotentialdyl + sin_two_omega_t * dpotentialdxl)
- source_euler_B(i,j,k) = two_omega_deltat &
- * (sin_two_omega_t * dpotentialdyl - cos_two_omega_t * dpotentialdxl)
-
- A_rotation = A_array_rotation(i,j,k,ispec)
- B_rotation = B_array_rotation(i,j,k,ispec)
-
- ux_rotation = A_rotation*cos_two_omega_t + B_rotation*sin_two_omega_t
- uy_rotation = - A_rotation*sin_two_omega_t + B_rotation*cos_two_omega_t
-
- dpotentialdx_with_rot = dpotentialdxl + ux_rotation
- dpotentialdy_with_rot = dpotentialdyl + uy_rotation
-
- else
-
- dpotentialdx_with_rot = dpotentialdxl
- dpotentialdy_with_rot = dpotentialdyl
-
- endif ! end of section with rotation
-
- ! add (chi/rho)grad(rho) term in no gravity case
- if(.not. GRAVITY_VAL) then
-
- ! With regards to the non-gravitating case: we cannot set N^2 = 0 *and* let g = 0.
- ! We can *either* assume N^2 = 0 but keep gravity g, *or* we can assume that gravity
- ! is negligible to begin with, as in our GJI 2002a, in which case N does not arise.
- ! We get:
- !
- ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
- !
- ! Then the displacement is
- !
- ! \bu = \bdel\chi+\chi\bdel\ln\rho = \rho^{-1}\bdel(\rho\chi)
- !
- ! and the pressure is
- !
- ! p = -\rho\ddot{\chi}
- !
- ! Thus in our 2002b GJI paper eqn (21) is wrong, and equation (41)
- ! in our AGU monograph is incorrect; these equations should be replaced by
- !
- ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
- !
- ! Note that the fluid potential we use in GJI 2002a differs from the one used here:
- !
- ! \chi_GJI2002a = \rho\partial\t\chi
- !
- ! such that
- !
- ! \bv = \partial_t\bu=\rho^{-1}\bdel\chi_GJI2002a (GJI 2002a eqn 20)
- !
- ! p = - \partial_t\chi_GJI2002a (GJI 2002a eqn 19)
-
- ! use mesh coordinates to get theta and phi
- ! x y z contain r theta phi
- dpotentialdx_with_rot = dpotentialdx_with_rot + displ_times_grad_x_ln_rho(i,j,k)
- dpotentialdy_with_rot = dpotentialdy_with_rot + displ_times_grad_y_ln_rho(i,j,k)
- dpotentialdzl = dpotentialdzl + displ_times_grad_z_ln_rho(i,j,k)
-
- else ! if gravity is turned on
-
- ! compute divergence of displacment
- gxl = temp_gxl(i,j,k)
- gyl = temp_gyl(i,j,k)
- gzl = temp_gzl(i,j,k)
-
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- gravity_term(i,j,k) = &
- sngl( minus_rho_g_over_kappa_fluid(int_radius) &
- * dble(jacobianl) * wgll_cube(i,j,k) &
- * (dble(dpotentialdx_with_rot) * gxl &
- + dble(dpotentialdy_with_rot) * gyl &
- + dble(dpotentialdzl) * gzl) )
- else
- gravity_term(i,j,k) = minus_rho_g_over_kappa_fluid(int_radius) * &
- jacobianl * wgll_cube(i,j,k) &
- * (dpotentialdx_with_rot * gxl &
- + dpotentialdy_with_rot * gyl &
- + dpotentialdzl * gzl)
- endif
-
- ! divergence of displacement field with gravity on
- ! note: these calculations are only considered for SIMULATION_TYPE == 1 .and. SAVE_FORWARD
- ! and one has set MOVIE_VOLUME_TYPE == 4 when MOVIE_VOLUME is .true.;
- ! in case of SIMULATION_TYPE == 3, it gets overwritten by compute_kernels_outer_core()
- if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. MOVIE_VOLUME) then
- div_displfluid(i,j,k,ispec) = &
- minus_rho_g_over_kappa_fluid(int_radius) &
- * (dpotentialdx_with_rot * gxl &
- + dpotentialdy_with_rot * gyl &
- + dpotentialdzl * gzl)
- endif
-
- endif
-
- tempx1(i,j,k) = jacobianl*(xixl*dpotentialdx_with_rot &
- + xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl)
- tempx2(i,j,k) = jacobianl*(etaxl*dpotentialdx_with_rot &
- + etayl*dpotentialdy_with_rot + etazl*dpotentialdzl)
- tempx3(i,j,k) = jacobianl*(gammaxl*dpotentialdx_with_rot &
- + gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl)
-
- enddo
- enddo
- enddo
-
- ! subroutines adapted from Deville, Fischer and Mund, High-order methods
- ! for incompressible fluid flow, Cambridge University Press (2002),
- ! pages 386 and 389 and Figure 8.3.1
- do j=1,m2
- do i=1,m1
- E1_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_5points(1,j) + &
- hprimewgll_xxT(i,2)*C1_m1_m2_5points(2,j) + &
- hprimewgll_xxT(i,3)*C1_m1_m2_5points(3,j) + &
- hprimewgll_xxT(i,4)*C1_m1_m2_5points(4,j) + &
- hprimewgll_xxT(i,5)*C1_m1_m2_5points(5,j)
- enddo
- enddo
-
- do k = 1,NGLLX
- do j=1,m1
- do i=1,m1
- newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
- tempx2(i,2,k)*hprimewgll_xx(2,j) + &
- tempx2(i,3,k)*hprimewgll_xx(3,j) + &
- tempx2(i,4,k)*hprimewgll_xx(4,j) + &
- tempx2(i,5,k)*hprimewgll_xx(5,j)
- enddo
- enddo
- enddo
-
- do j=1,m1
- do i=1,m2
- E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
- C1_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
- C1_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
- C1_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
- C1_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
- enddo
- enddo
-
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
-
- ! sum contributions from each element to the global mesh and add gravity term
- sum_terms = - (wgllwgll_yz(j,k)*newtempx1(i,j,k) &
- + wgllwgll_xz(i,k)*newtempx2(i,j,k) &
- + wgllwgll_xy(i,j)*newtempx3(i,j,k))
-
- if(GRAVITY_VAL) sum_terms = sum_terms + gravity_term(i,j,k)
-
- iglob = ibool(i,j,k,ispec)
- accelfluid(iglob) = accelfluid(iglob) + sum_terms
-
- enddo
- enddo
- enddo
-
- ! update rotation term with Euler scheme
- if(ROTATION_VAL) then
- ! use the source saved above
- A_array_rotation(:,:,:,ispec) = A_array_rotation(:,:,:,ispec) + source_euler_A(:,:,:)
- B_array_rotation(:,:,:,ispec) = B_array_rotation(:,:,:,ispec) + source_euler_B(:,:,:)
- endif
-
- enddo ! spectral element loop
-
- end subroutine compute_forces_outer_core_Dev
-
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -25,12 +25,15 @@
!
!=====================================================================
+
subroutine compute_forces_viscoelastic()
+! elastic domains for forward or adjoint simulations (SIMULATION_TYPE == 1 or 2 )
+
use specfem_par
use specfem_par_crustmantle
use specfem_par_innercore
- use specfem_par_outercore,only: accel_outer_core,b_accel_outer_core, &
+ use specfem_par_outercore,only: accel_outer_core, &
normal_top_outer_core,jacobian2D_top_outer_core, &
normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
ibelm_top_outer_core,ibelm_bottom_outer_core, &
@@ -45,41 +48,9 @@
integer :: iphase
logical :: phase_is_inner
+ ! checks
+ if( SIMULATION_TYPE == 3 ) return
-!daniel debug: att - debug
-! integer :: iglob
-! logical,parameter :: DEBUG = .false.
-! if( DEBUG ) then
-! iglob = ibool_crust_mantle(1,1,1,100)
-! if( SIMULATION_TYPE == 1) then
-! if( it == NSTEP .and. myrank == 0 ) then
-! print*,'last step',it,'Rxx:',R_xx_crust_mantle(1,1,1,1,100),epsilondev_xx_crust_mantle(1,1,1,100), &
-! displ_crust_mantle(1,iglob),displ_crust_mantle(2,iglob),displ_crust_mantle(3,iglob)
-! endif
-! if( it == NSTEP-1 .and. myrank == 0 ) then
-! print*,'second last step',it,'Rxx:',R_xx_crust_mantle(1,1,1,1,100),epsilondev_xx_crust_mantle(1,1,1,100), &
-! displ_crust_mantle(1,iglob),displ_crust_mantle(2,iglob),displ_crust_mantle(3,iglob)
-! endif
-! if( it == NSTEP-2 .and. myrank == 0 ) then
-! print*,'third last step',it,'Rxx:',R_xx_crust_mantle(1,1,1,1,100),epsilondev_xx_crust_mantle(1,1,1,100), &
-! displ_crust_mantle(1,iglob),displ_crust_mantle(2,iglob),displ_crust_mantle(3,iglob)
-! endif
-! else if( SIMULATION_TYPE == 3 ) then
-! if( it == 1 .and. myrank == 0 ) then
-! print*,'first step',it,'Rxx:',b_R_xx_crust_mantle(1,1,1,1,100),b_epsilondev_xx_crust_mantle(1,1,1,100), &
-! b_displ_crust_mantle(1,iglob),b_displ_crust_mantle(2,iglob),b_displ_crust_mantle(3,iglob)
-! endif
-! if( it == 2 .and. myrank == 0 ) then
-! print*,'second step',it,'Rxx:',b_R_xx_crust_mantle(1,1,1,1,100),b_epsilondev_xx_crust_mantle(1,1,1,100), &
-! b_displ_crust_mantle(1,iglob),b_displ_crust_mantle(2,iglob),b_displ_crust_mantle(3,iglob)
-! endif
-! if( it == 3 .and. myrank == 0 ) then
-! print*,'third step',it,'Rxx:',b_R_xx_crust_mantle(1,1,1,1,100),b_epsilondev_xx_crust_mantle(1,1,1,100), &
-! b_displ_crust_mantle(1,iglob),b_displ_crust_mantle(2,iglob),b_displ_crust_mantle(3,iglob)
-! endif
-! endif
-! endif
-
! ****************************************************
! big loop over all spectral elements in the solid
! ****************************************************
@@ -102,12 +73,10 @@
phase_is_inner = .true.
endif
-
+ ! compute internal forces in the solid regions
+ ! note: for anisotropy and gravity, x y and z contain r theta and phi
if( .NOT. GPU_MODE ) then
! on CPU
-
- ! compute internal forces in the solid regions
- ! note: for anisotropy and gravity, x y and z contain r theta and phi
if( USE_DEVILLE_PRODUCTS_VAL ) then
! uses Deville (2002) optimizations
! crust/mantle region
@@ -123,7 +92,6 @@
eps_trace_over_3_crust_mantle, &
alphaval,betaval,gammaval, &
factor_common_crust_mantle,size(factor_common_crust_mantle,5), .false. )
-
! inner core region
call compute_forces_inner_core_Dev( NSPEC_INNER_CORE_STR_OR_ATT,NGLOB_INNER_CORE, &
NSPEC_INNER_CORE_ATTENUATION, &
@@ -136,7 +104,6 @@
eps_trace_over_3_inner_core,&
alphaval,betaval,gammaval, &
factor_common_inner_core,size(factor_common_inner_core,5), .false. )
-
else
! no Deville optimization
! crust/mantle region
@@ -166,72 +133,6 @@
factor_common_inner_core,size(factor_common_inner_core,5) )
endif
-
- ! adjoint / kernel runs
- if (SIMULATION_TYPE == 3 ) then
- if( USE_DEVILLE_PRODUCTS_VAL ) then
- ! uses Deville (2002) optimizations
- ! crust/mantle region
- call compute_forces_crust_mantle_Dev( NSPEC_CRUST_MANTLE_ADJOINT,NGLOB_CRUST_MANTLE_ADJOINT, &
- NSPEC_CRUST_MANTLE_STR_AND_ATT, &
- b_deltat, &
- b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
- phase_is_inner, &
- b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
- b_R_xz_crust_mantle,b_R_yz_crust_mantle, &
- b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,&
- b_epsilondev_xy_crust_mantle, &
- b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
- b_eps_trace_over_3_crust_mantle, &
- b_alphaval,b_betaval,b_gammaval, &
- factor_common_crust_mantle,size(factor_common_crust_mantle,5), .true. )
- ! inner core region
- call compute_forces_inner_core_Dev( NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
- NSPEC_INNER_CORE_STR_AND_ATT, &
- b_deltat, &
- b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
- phase_is_inner, &
- b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
- b_R_xz_inner_core,b_R_yz_inner_core, &
- b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
- b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
- b_eps_trace_over_3_inner_core,&
- b_alphaval,b_betaval,b_gammaval, &
- factor_common_inner_core,size(factor_common_inner_core,5), .true. )
-
- else
- ! no Deville optimization
- ! crust/mantle region
- call compute_forces_crust_mantle( NSPEC_CRUST_MANTLE_ADJOINT,NGLOB_CRUST_MANTLE_ADJOINT, &
- NSPEC_CRUST_MANTLE_STR_AND_ATT, &
- b_deltat, &
- b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
- phase_is_inner, &
- b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
- b_R_xz_crust_mantle,b_R_yz_crust_mantle, &
- b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,&
- b_epsilondev_xy_crust_mantle, &
- b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
- b_eps_trace_over_3_crust_mantle, &
- b_alphaval,b_betaval,b_gammaval, &
- factor_common_crust_mantle,size(factor_common_crust_mantle,5) )
-
- ! inner core region
- call compute_forces_inner_core( NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
- NSPEC_INNER_CORE_STR_AND_ATT, &
- b_deltat, &
- b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
- phase_is_inner, &
- b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
- b_R_xz_inner_core,b_R_yz_inner_core, &
- b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
- b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
- b_eps_trace_over_3_inner_core,&
- b_alphaval,b_betaval,b_gammaval, &
- factor_common_inner_core,size(factor_common_inner_core,5) )
- endif
- endif !SIMULATION_TYPE == 3
-
else
! on GPU
! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
@@ -246,12 +147,12 @@
! absorbing boundaries
! Stacey
- if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_crust_mantle()
+ if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_crust_mantle_forward()
! add the sources
! add adjoint sources
- if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
+ if (SIMULATION_TYPE == 2 ) then
if( nadj_rec_local > 0 ) call compute_add_sources_adjoint()
endif
@@ -262,9 +163,6 @@
! adds sources for forward simulation
if (SIMULATION_TYPE == 1 .and. nsources_local > 0) &
call compute_add_sources()
- ! add sources for backward/reconstructed wavefield
- if (SIMULATION_TYPE == 3 .and. nsources_local > 0) &
- call compute_add_sources_backward()
case( 1 )
! the first step of noise tomography is to use |S(\omega)|^2 as a point force source at one of the receivers.
@@ -282,14 +180,6 @@
! note the ensemble forward sources are generally distributed on the surface of the earth
! that's to say, the ensemble forward source is kind of a surface force density, not a body force density
! therefore, we must add it here, before applying the inverse of mass matrix
-
- case( 3 )
- ! third step of noise tomography, i.e., read the surface movie saved at every timestep
- ! use the movie to reconstruct the ensemble forward wavefield
- ! the ensemble adjoint wavefield is done as usual
- ! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal
- call noise_read_add_surface_movie(NGLOB_CRUST_MANTLE_ADJOINT,b_accel_crust_mantle,it)
-
end select
! ****************************************************
@@ -302,27 +192,25 @@
!--- couple with outer core at the bottom of the mantle
!---
if(ACTUALLY_COUPLE_FLUID_CMB) &
- call compute_coupling_CMB_fluid(displ_crust_mantle,b_displ_crust_mantle, &
- accel_crust_mantle,b_accel_crust_mantle, &
- ibool_crust_mantle,ibelm_bottom_crust_mantle, &
- accel_outer_core,b_accel_outer_core, &
- normal_top_outer_core,jacobian2D_top_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
- RHO_TOP_OC,minus_g_cmb, &
- SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE))
+ call compute_coupling_CMB_fluid(displ_crust_mantle,accel_crust_mantle, &
+ ibool_crust_mantle,ibelm_bottom_crust_mantle, &
+ accel_outer_core, &
+ normal_top_outer_core,jacobian2D_top_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+ RHO_TOP_OC,minus_g_cmb, &
+ NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE))
!---
!--- couple with outer core at the top of the inner core
!---
if(ACTUALLY_COUPLE_FLUID_ICB) &
- call compute_coupling_ICB_fluid(displ_inner_core,b_displ_inner_core, &
- accel_inner_core,b_accel_inner_core, &
- ibool_inner_core,ibelm_top_inner_core, &
- accel_outer_core,b_accel_outer_core, &
- normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
- wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
- RHO_BOTTOM_OC,minus_g_icb, &
- SIMULATION_TYPE,NSPEC2D_TOP(IREGION_INNER_CORE))
+ call compute_coupling_ICB_fluid(displ_inner_core,accel_inner_core, &
+ ibool_inner_core,ibelm_top_inner_core, &
+ accel_outer_core, &
+ normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+ RHO_BOTTOM_OC,minus_g_icb, &
+ NSPEC2D_TOP(IREGION_INNER_CORE))
else
! on GPU
@@ -330,12 +218,12 @@
!--- couple with outer core at the bottom of the mantle
!---
if( ACTUALLY_COUPLE_FLUID_CMB ) &
- call compute_coupling_cmb_fluid_cuda(Mesh_pointer)
+ call compute_coupling_cmb_fluid_cuda(Mesh_pointer,1)
!---
!--- couple with outer core at the top of the inner core
!---
if( ACTUALLY_COUPLE_FLUID_ICB ) &
- call compute_coupling_icb_fluid_cuda(Mesh_pointer)
+ call compute_coupling_icb_fluid_cuda(Mesh_pointer,1)
endif
endif ! iphase == 1
@@ -387,51 +275,6 @@
IREGION_INNER_CORE, &
1)
endif ! GPU_MODE
-
- ! adjoint / kernel runs
- if (SIMULATION_TYPE == 3) then
- if(.NOT. GPU_MODE) then
- ! on CPU
- ! sends accel values to corresponding MPI interface neighbors
- ! crust mantle
- call assemble_MPI_vector_s(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
- b_accel_crust_mantle, &
- b_buffer_send_vector_cm,b_buffer_recv_vector_cm, &
- num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
- nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,&
- my_neighbours_crust_mantle, &
- b_request_send_vector_cm,b_request_recv_vector_cm)
- ! inner core
- call assemble_MPI_vector_s(NPROCTOT_VAL,NGLOB_INNER_CORE, &
- b_accel_inner_core, &
- b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core, &
- num_interfaces_inner_core,max_nibool_interfaces_ic, &
- nibool_interfaces_inner_core,ibool_interfaces_inner_core,&
- my_neighbours_inner_core, &
- b_request_send_vector_ic,b_request_recv_vector_ic)
- else
- ! on GPU
- ! crust mantle
- call assemble_MPI_vector_send_cuda(Mesh_pointer,NPROCTOT_VAL, &
- b_buffer_send_vector_cm,b_buffer_recv_vector_cm, &
- num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
- nibool_interfaces_crust_mantle,&
- my_neighbours_crust_mantle, &
- b_request_send_vector_cm,b_request_recv_vector_cm, &
- IREGION_CRUST_MANTLE, &
- 3) ! <-- 3 == adjoint b_accel
- ! inner core
- call assemble_MPI_vector_send_cuda(Mesh_pointer,NPROCTOT_VAL, &
- b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core, &
- num_interfaces_inner_core,max_nibool_interfaces_ic, &
- nibool_interfaces_inner_core,&
- my_neighbours_inner_core, &
- b_request_send_vector_ic,b_request_recv_vector_ic, &
- IREGION_INNER_CORE, &
- 3)
- endif ! GPU
- endif ! SIMULATION_TYPE == 3
-
else
! waits for send/receive requests to be completed and assembles values
if(.NOT. GPU_MODE) then
@@ -467,46 +310,6 @@
IREGION_INNER_CORE, &
1)
endif
-
-
- ! adjoint / kernel runs
- if (SIMULATION_TYPE == 3) then
- ! waits for send/receive requests to be completed and assembles values
- if(.NOT. GPU_MODE) then
- ! on CPU
- ! crust mantle
- call assemble_MPI_vector_w(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
- b_accel_crust_mantle, &
- b_buffer_recv_vector_cm,num_interfaces_crust_mantle,&
- max_nibool_interfaces_cm, &
- nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
- b_request_send_vector_cm,b_request_recv_vector_cm)
- ! inner core
- call assemble_MPI_vector_w(NPROCTOT_VAL,NGLOB_INNER_CORE, &
- b_accel_inner_core, &
- b_buffer_recv_vector_inner_core,num_interfaces_inner_core,&
- max_nibool_interfaces_ic, &
- nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
- b_request_send_vector_ic,b_request_recv_vector_ic)
-
- else
- ! on GPU
- ! crust mantle
- call assemble_MPI_vector_write_cuda(Mesh_pointer,NPROCTOT_VAL, &
- b_buffer_recv_vector_cm, &
- num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
- b_request_send_vector_cm,b_request_recv_vector_cm, &
- IREGION_CRUST_MANTLE, &
- 3) ! <-- 3 == adjoint b_accel
- ! inner core
- call assemble_MPI_vector_write_cuda(Mesh_pointer,NPROCTOT_VAL,&
- b_buffer_recv_vector_inner_core, &
- num_interfaces_inner_core,max_nibool_interfaces_ic, &
- b_request_send_vector_ic,b_request_recv_vector_ic, &
- IREGION_INNER_CORE, &
- 3)
- endif
- endif ! SIMULATION_TYPE == 3
endif ! iphase == 1
enddo ! iphase
@@ -515,19 +318,11 @@
if(.NOT. GPU_MODE) then
! on CPU
call update_accel_elastic(NGLOB_CRUST_MANTLE,NGLOB_XY_CM,veloc_crust_mantle,accel_crust_mantle, &
- two_omega_earth, &
- rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle, &
- NCHUNKS_VAL,ABSORBING_CONDITIONS)
- ! adjoint / kernel runs
- if (SIMULATION_TYPE == 3) &
- call update_accel_elastic(NGLOB_CRUST_MANTLE_ADJOINT,NGLOB_XY_CM,b_veloc_crust_mantle,b_accel_crust_mantle, &
- b_two_omega_earth, &
- rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle, &
- NCHUNKS_VAL,ABSORBING_CONDITIONS)
+ two_omega_earth,rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle)
else
! on GPU
- call kernel_3_a_cuda(Mesh_pointer, &
- deltatover2,SIMULATION_TYPE,b_deltatover2,NCHUNKS_VAL)
+ ! includes FORWARD_OR_ADJOINT == 1
+ call update_accel_3_a_cuda(Mesh_pointer,deltatover2,NCHUNKS_VAL,1)
endif
! couples ocean with crust mantle
@@ -540,9 +335,7 @@
rmass_ocean_load,normal_top_crust_mantle, &
ibool_crust_mantle,ibelm_top_crust_mantle, &
updated_dof_ocean_load,NGLOB_XY_CM, &
- NSPEC2D_TOP(IREGION_CRUST_MANTLE), &
- ABSORBING_CONDITIONS,EXACT_MASS_MATRIX_FOR_ROTATION,USE_LDDRK)
-
+ NSPEC2D_TOP(IREGION_CRUST_MANTLE) )
else
! on GPU
call compute_coupling_ocean_cuda(Mesh_pointer,NCHUNKS_VAL,EXACT_MASS_MATRIX_FOR_ROTATION,USE_LDDRK, &
@@ -558,15 +351,376 @@
call update_veloc_elastic(NGLOB_CRUST_MANTLE,veloc_crust_mantle,accel_crust_mantle, &
NGLOB_INNER_CORE,veloc_inner_core,accel_inner_core, &
deltatover2,two_omega_earth,rmass_inner_core)
+ else
+ ! on GPU
+ ! includes FORWARD_OR_ADJOINT == 1
+ call update_veloc_3_b_cuda(Mesh_pointer,deltatover2,1)
+ endif
+
+ end subroutine compute_forces_viscoelastic
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine compute_forces_viscoelastic_backward()
+
+! backward/reconstructed wavefields only
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore,only: b_accel_outer_core, &
+ normal_top_outer_core,jacobian2D_top_outer_core, &
+ normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+ ibelm_top_outer_core,ibelm_bottom_outer_core, &
+ ibool_outer_core
+ use specfem_par_movie
+ implicit none
+
+ ! local parameters
+ ! non blocking MPI
+ ! iphase: iphase = 1 is for computing outer elements in the crust_mantle and inner_core regions,
+ ! iphase = 2 is for computing inner elements (former icall parameter)
+ integer :: iphase
+ logical :: phase_is_inner
+
+ ! checks
+ if( SIMULATION_TYPE /= 3 ) return
+
+!daniel debug: att - debug
+! integer :: iglob
+! logical,parameter :: DEBUG = .false.
+! if( DEBUG ) then
+! iglob = ibool_crust_mantle(1,1,1,100)
+! if( SIMULATION_TYPE == 1) then
+! if( it == NSTEP .and. myrank == 0 ) then
+! print*,'last step',it,'Rxx:',R_xx_crust_mantle(1,1,1,1,100),epsilondev_xx_crust_mantle(1,1,1,100), &
+! displ_crust_mantle(1,iglob),displ_crust_mantle(2,iglob),displ_crust_mantle(3,iglob)
+! endif
+! if( it == NSTEP-1 .and. myrank == 0 ) then
+! print*,'second last step',it,'Rxx:',R_xx_crust_mantle(1,1,1,1,100),epsilondev_xx_crust_mantle(1,1,1,100), &
+! displ_crust_mantle(1,iglob),displ_crust_mantle(2,iglob),displ_crust_mantle(3,iglob)
+! endif
+! if( it == NSTEP-2 .and. myrank == 0 ) then
+! print*,'third last step',it,'Rxx:',R_xx_crust_mantle(1,1,1,1,100),epsilondev_xx_crust_mantle(1,1,1,100), &
+! displ_crust_mantle(1,iglob),displ_crust_mantle(2,iglob),displ_crust_mantle(3,iglob)
+! endif
+! else if( SIMULATION_TYPE == 3 ) then
+! if( it == 1 .and. myrank == 0 ) then
+! print*,'first step',it,'Rxx:',b_R_xx_crust_mantle(1,1,1,1,100),b_epsilondev_xx_crust_mantle(1,1,1,100), &
+! b_displ_crust_mantle(1,iglob),b_displ_crust_mantle(2,iglob),b_displ_crust_mantle(3,iglob)
+! endif
+! if( it == 2 .and. myrank == 0 ) then
+! print*,'second step',it,'Rxx:',b_R_xx_crust_mantle(1,1,1,1,100),b_epsilondev_xx_crust_mantle(1,1,1,100), &
+! b_displ_crust_mantle(1,iglob),b_displ_crust_mantle(2,iglob),b_displ_crust_mantle(3,iglob)
+! endif
+! if( it == 3 .and. myrank == 0 ) then
+! print*,'third step',it,'Rxx:',b_R_xx_crust_mantle(1,1,1,1,100),b_epsilondev_xx_crust_mantle(1,1,1,100), &
+! b_displ_crust_mantle(1,iglob),b_displ_crust_mantle(2,iglob),b_displ_crust_mantle(3,iglob)
+! endif
+! endif
+! endif
+
+ ! ****************************************************
+ ! big loop over all spectral elements in the solid
+ ! ****************************************************
+
+ ! compute internal forces in the solid regions
+
+ ! for anisotropy and gravity, x y and z contain r theta and phi
+
+ ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions
+ do iphase = 1,2
+
+ ! first, iphase == 1 for points on MPI interfaces (thus outer elements)
+ ! second, iphase == 2 for points purely inside partition (thus inner elements)
+ !
+ ! compute all the outer elements first, then sends out non blocking MPI communication
+ ! and continues computing inner elements (overlapping)
+ if( iphase == 1 ) then
+ phase_is_inner = .false.
+ else
+ phase_is_inner = .true.
+ endif
+
+ ! compute internal forces in the solid regions
+ ! note: for anisotropy and gravity, x y and z contain r theta and phi
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ ! adjoint / kernel runs
+ if( USE_DEVILLE_PRODUCTS_VAL ) then
+ ! uses Deville (2002) optimizations
+ ! crust/mantle region
+ call compute_forces_crust_mantle_Dev( NSPEC_CRUST_MANTLE_ADJOINT,NGLOB_CRUST_MANTLE_ADJOINT, &
+ NSPEC_CRUST_MANTLE_STR_AND_ATT, &
+ b_deltat, &
+ b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
+ phase_is_inner, &
+ b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
+ b_R_xz_crust_mantle,b_R_yz_crust_mantle, &
+ b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,&
+ b_epsilondev_xy_crust_mantle, &
+ b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
+ b_eps_trace_over_3_crust_mantle, &
+ b_alphaval,b_betaval,b_gammaval, &
+ factor_common_crust_mantle,size(factor_common_crust_mantle,5), .true. )
+ ! inner core region
+ call compute_forces_inner_core_Dev( NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
+ NSPEC_INNER_CORE_STR_AND_ATT, &
+ b_deltat, &
+ b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
+ phase_is_inner, &
+ b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
+ b_R_xz_inner_core,b_R_yz_inner_core, &
+ b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+ b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
+ b_eps_trace_over_3_inner_core,&
+ b_alphaval,b_betaval,b_gammaval, &
+ factor_common_inner_core,size(factor_common_inner_core,5), .true. )
+
+ else
+ ! no Deville optimization
+ ! crust/mantle region
+ call compute_forces_crust_mantle( NSPEC_CRUST_MANTLE_ADJOINT,NGLOB_CRUST_MANTLE_ADJOINT, &
+ NSPEC_CRUST_MANTLE_STR_AND_ATT, &
+ b_deltat, &
+ b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
+ phase_is_inner, &
+ b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
+ b_R_xz_crust_mantle,b_R_yz_crust_mantle, &
+ b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,&
+ b_epsilondev_xy_crust_mantle, &
+ b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
+ b_eps_trace_over_3_crust_mantle, &
+ b_alphaval,b_betaval,b_gammaval, &
+ factor_common_crust_mantle,size(factor_common_crust_mantle,5) )
+
+ ! inner core region
+ call compute_forces_inner_core( NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
+ NSPEC_INNER_CORE_STR_AND_ATT, &
+ b_deltat, &
+ b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
+ phase_is_inner, &
+ b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
+ b_R_xz_inner_core,b_R_yz_inner_core, &
+ b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+ b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
+ b_eps_trace_over_3_inner_core,&
+ b_alphaval,b_betaval,b_gammaval, &
+ factor_common_inner_core,size(factor_common_inner_core,5) )
+ endif
+ else
+ ! on GPU
+ ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
+ ! for crust/mantle
+ call compute_forces_crust_mantle_cuda(Mesh_pointer,iphase)
+ ! for inner core
+ call compute_forces_inner_core_cuda(Mesh_pointer,iphase)
+ endif ! GPU_MODE
+
+ ! computes additional contributions to acceleration field
+ if( iphase == 1 ) then
+
+ ! absorbing boundaries
+ ! Stacey
+ if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_crust_mantle_backward()
+
+ ! add the sources
+ select case( NOISE_TOMOGRAPHY )
+ case( 0 )
+ ! add sources for backward/reconstructed wavefield
+ if( nsources_local > 0 ) &
+ call compute_add_sources_backward()
+
+ case( 3 )
+ ! third step of noise tomography, i.e., read the surface movie saved at every timestep
+ ! use the movie to reconstruct the ensemble forward wavefield
+ ! the ensemble adjoint wavefield is done as usual
+ ! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal
+ call noise_read_add_surface_movie(NGLOB_CRUST_MANTLE_ADJOINT,b_accel_crust_mantle,it)
+
+ end select
+
+ ! ****************************************************
+ ! ********** add matching with fluid part **********
+ ! ****************************************************
+ ! only for elements in first matching layer in the solid
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ !---
+ !--- couple with outer core at the bottom of the mantle
+ !---
+ if(ACTUALLY_COUPLE_FLUID_CMB) &
+ call compute_coupling_CMB_fluid(b_displ_crust_mantle,b_accel_crust_mantle, &
+ ibool_crust_mantle,ibelm_bottom_crust_mantle, &
+ b_accel_outer_core, &
+ normal_top_outer_core,jacobian2D_top_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+ RHO_TOP_OC,minus_g_cmb, &
+ NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE))
+
+ !---
+ !--- couple with outer core at the top of the inner core
+ !---
+ if(ACTUALLY_COUPLE_FLUID_ICB) &
+ call compute_coupling_ICB_fluid(b_displ_inner_core,b_accel_inner_core, &
+ ibool_inner_core,ibelm_top_inner_core, &
+ b_accel_outer_core, &
+ normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+ wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+ RHO_BOTTOM_OC,minus_g_icb, &
+ NSPEC2D_TOP(IREGION_INNER_CORE))
+
+ else
+ ! on GPU
+ !---
+ !--- couple with outer core at the bottom of the mantle
+ !---
+ if( ACTUALLY_COUPLE_FLUID_CMB ) &
+ call compute_coupling_cmb_fluid_cuda(Mesh_pointer,3)
+ !---
+ !--- couple with outer core at the top of the inner core
+ !---
+ if( ACTUALLY_COUPLE_FLUID_ICB ) &
+ call compute_coupling_icb_fluid_cuda(Mesh_pointer,3)
+
+ endif
+ endif ! iphase == 1
+
+ ! assemble all the contributions between slices using MPI
+
+ ! crust/mantle and inner core handled in the same call
+ ! in order to reduce the number of MPI messages by 2
+
+ if( iphase == 1 ) then
+ ! sends out MPI interface data
+ ! adjoint / kernel runs
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ ! sends accel values to corresponding MPI interface neighbors
+ ! crust mantle
+ call assemble_MPI_vector_s(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+ b_accel_crust_mantle, &
+ b_buffer_send_vector_cm,b_buffer_recv_vector_cm, &
+ num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
+ nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,&
+ my_neighbours_crust_mantle, &
+ b_request_send_vector_cm,b_request_recv_vector_cm)
+ ! inner core
+ call assemble_MPI_vector_s(NPROCTOT_VAL,NGLOB_INNER_CORE, &
+ b_accel_inner_core, &
+ b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core, &
+ num_interfaces_inner_core,max_nibool_interfaces_ic, &
+ nibool_interfaces_inner_core,ibool_interfaces_inner_core,&
+ my_neighbours_inner_core, &
+ b_request_send_vector_ic,b_request_recv_vector_ic)
+ else
+ ! on GPU
+ ! crust mantle
+ call assemble_MPI_vector_send_cuda(Mesh_pointer,NPROCTOT_VAL, &
+ b_buffer_send_vector_cm,b_buffer_recv_vector_cm, &
+ num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
+ nibool_interfaces_crust_mantle,&
+ my_neighbours_crust_mantle, &
+ b_request_send_vector_cm,b_request_recv_vector_cm, &
+ IREGION_CRUST_MANTLE, &
+ 3) ! <-- 3 == adjoint b_accel
+ ! inner core
+ call assemble_MPI_vector_send_cuda(Mesh_pointer,NPROCTOT_VAL, &
+ b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core, &
+ num_interfaces_inner_core,max_nibool_interfaces_ic, &
+ nibool_interfaces_inner_core,&
+ my_neighbours_inner_core, &
+ b_request_send_vector_ic,b_request_recv_vector_ic, &
+ IREGION_INNER_CORE, &
+ 3)
+ endif ! GPU
+ else
+ ! waits for send/receive requests to be completed and assembles values
+ ! adjoint / kernel runs
+ ! waits for send/receive requests to be completed and assembles values
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ ! crust mantle
+ call assemble_MPI_vector_w(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+ b_accel_crust_mantle, &
+ b_buffer_recv_vector_cm,num_interfaces_crust_mantle,&
+ max_nibool_interfaces_cm, &
+ nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
+ b_request_send_vector_cm,b_request_recv_vector_cm)
+ ! inner core
+ call assemble_MPI_vector_w(NPROCTOT_VAL,NGLOB_INNER_CORE, &
+ b_accel_inner_core, &
+ b_buffer_recv_vector_inner_core,num_interfaces_inner_core,&
+ max_nibool_interfaces_ic, &
+ nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
+ b_request_send_vector_ic,b_request_recv_vector_ic)
+
+ else
+ ! on GPU
+ ! crust mantle
+ call assemble_MPI_vector_write_cuda(Mesh_pointer,NPROCTOT_VAL, &
+ b_buffer_recv_vector_cm, &
+ num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
+ b_request_send_vector_cm,b_request_recv_vector_cm, &
+ IREGION_CRUST_MANTLE, &
+ 3) ! <-- 3 == adjoint b_accel
+ ! inner core
+ call assemble_MPI_vector_write_cuda(Mesh_pointer,NPROCTOT_VAL,&
+ b_buffer_recv_vector_inner_core, &
+ num_interfaces_inner_core,max_nibool_interfaces_ic, &
+ b_request_send_vector_ic,b_request_recv_vector_ic, &
+ IREGION_INNER_CORE, &
+ 3)
+ endif
+ endif ! iphase == 1
+
+ enddo ! iphase
+
+ ! updates (only) acceleration w/ rotation in the crust/mantle region (touches oceans)
+ if(.NOT. GPU_MODE) then
+ ! on CPU
! adjoint / kernel runs
- if (SIMULATION_TYPE == 3) &
- call update_veloc_elastic(NGLOB_CRUST_MANTLE_ADJOINT,b_veloc_crust_mantle,b_accel_crust_mantle, &
- NGLOB_INNER_CORE_ADJOINT,b_veloc_inner_core,b_accel_inner_core, &
- b_deltatover2,b_two_omega_earth,rmass_inner_core)
+ call update_accel_elastic(NGLOB_CRUST_MANTLE_ADJOINT,NGLOB_XY_CM,b_veloc_crust_mantle,b_accel_crust_mantle, &
+ b_two_omega_earth,rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle)
else
+ ! on GPU
+ ! includes FORWARD_OR_ADJOINT == 3
+ call update_accel_3_a_cuda(Mesh_pointer,b_deltatover2,NCHUNKS_VAL,3)
+ endif
+
+ ! couples ocean with crust mantle
+ ! (updates acceleration with ocean load approximation)
+ if( OCEANS_VAL ) then
+ if(.NOT. GPU_MODE) then
+ ! on CPU
+ call compute_coupling_ocean(b_accel_crust_mantle, &
+ rmassx_crust_mantle, rmassy_crust_mantle, rmassz_crust_mantle, &
+ rmass_ocean_load,normal_top_crust_mantle, &
+ ibool_crust_mantle,ibelm_top_crust_mantle, &
+ updated_dof_ocean_load,NGLOB_XY_CM, &
+ NSPEC2D_TOP(IREGION_CRUST_MANTLE) )
+ else
+ ! on GPU
+ call compute_coupling_ocean_cuda(Mesh_pointer,NCHUNKS_VAL,EXACT_MASS_MATRIX_FOR_ROTATION,USE_LDDRK, &
+ 3) ! <- 3 == backward/reconstructed arrays
+ endif
+ endif
+
+ ! Newmark time scheme:
+ ! corrector terms for elastic parts
+ ! (updates velocity)
+ if(.NOT. GPU_MODE ) then
+ ! on CPU
+ ! adjoint / kernel runs
+ call update_veloc_elastic(NGLOB_CRUST_MANTLE_ADJOINT,b_veloc_crust_mantle,b_accel_crust_mantle, &
+ NGLOB_INNER_CORE_ADJOINT,b_veloc_inner_core,b_accel_inner_core, &
+ b_deltatover2,b_two_omega_earth,rmass_inner_core)
+ else
! on GPU
- call kernel_3_b_cuda(Mesh_pointer, &
- deltatover2,SIMULATION_TYPE,b_deltatover2)
+ ! includes FORWARD_OR_ADJOINT == 3
+ call update_veloc_3_b_cuda(Mesh_pointer,b_deltatover2,3)
endif
@@ -583,5 +737,5 @@
! endif
! endif
- end subroutine compute_forces_viscoelastic
+ end subroutine compute_forces_viscoelastic_backward
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.F90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.F90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -310,8 +310,15 @@
vector_accel(3) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
! density kernel
+! rho_kl_outer_core(i,j,k,ispec) = rho_kl_outer_core(i,j,k,ispec) &
+! + deltat * dot_product(vector_accel(:), b_vector_displ_outer_core(:,iglob))
+
+!! DK DK July 2013: replaces dot_product() with an unrolled expression, otherwise most compilers
+!! DK DK July 2013: will try to vectorize this rather than the outer loop, resulting in a much slower code
rho_kl_outer_core(i,j,k,ispec) = rho_kl_outer_core(i,j,k,ispec) &
- + deltat * dot_product(vector_accel(:), b_vector_displ_outer_core(:,iglob))
+ + deltat * ( vector_accel(1) * b_vector_displ_outer_core(1,iglob) &
+ + vector_accel(2) * b_vector_displ_outer_core(2,iglob) &
+ + vector_accel(3) * b_vector_displ_outer_core(3,iglob) )
! bulk modulus kernel
kappal = rhostore_outer_core(i,j,k,ispec)/kappavstore_outer_core(i,j,k,ispec)
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -25,18 +25,20 @@
!
!=====================================================================
- subroutine compute_stacey_crust_mantle()
+ subroutine compute_stacey_crust_mantle_forward()
+! stacey conditions for forward or adjoint wavefields (SIMULATION_TYPE == 1 or 2)
+
use constants_solver
use specfem_par,only: &
- ichunk,SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it, &
+ ichunk,SIMULATION_TYPE,SAVE_FORWARD,it, &
wgllwgll_xz,wgllwgll_yz
use specfem_par,only: GPU_MODE,Mesh_pointer
use specfem_par_crustmantle, only: &
- veloc_crust_mantle,accel_crust_mantle,b_accel_crust_mantle, &
+ veloc_crust_mantle,accel_crust_mantle, &
ibool_crust_mantle, &
jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle, &
jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle, &
@@ -71,6 +73,8 @@
! file access (by process rank modulo 8) showed that the following,
! simple approach is still fastest. (assuming that files are accessed on a local scratch disk)
+ ! checks
+ if( SIMULATION_TYPE == 3 ) return
! crust & mantle
@@ -78,13 +82,6 @@
! if two chunks exclude this face for one of them
if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AC) then
- ! reads absorbing boundary values
- if (SIMULATION_TYPE == 3 .and. nspec2D_xmin_crust_mantle > 0) then
- ! note: backward/reconstructed wavefields are read in after the Newmark time scheme in the first time loop
- ! this leads to a corresponding boundary condition at time index NSTEP - (it-1) = NSTEP - it + 1
- call read_abs(0,absorb_xmin_crust_mantle,reclen_xmin_crust_mantle,NSTEP-it+1)
- endif
-
if ( .NOT. GPU_MODE) then
! on CPU
do ispec2D=1,nspec2D_xmin_crust_mantle
@@ -119,9 +116,7 @@
accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
- if (SIMULATION_TYPE == 3) then
- b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_xmin_crust_mantle(:,j,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ if( SAVE_FORWARD ) then
absorb_xmin_crust_mantle(1,j,k,ispec2D) = tx*weight
absorb_xmin_crust_mantle(2,j,k,ispec2D) = ty*weight
absorb_xmin_crust_mantle(3,j,k,ispec2D) = tz*weight
@@ -138,7 +133,7 @@
endif
! writes absorbing boundary values
- if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmin_crust_mantle > 0 ) then
+ if( SAVE_FORWARD .and. nspec2D_xmin_crust_mantle > 0 ) then
call write_abs(0,absorb_xmin_crust_mantle, reclen_xmin_crust_mantle,it)
endif
@@ -148,11 +143,6 @@
! if two chunks exclude this face for one of them
if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AB) then
- ! reads absorbing boundary values
- if (SIMULATION_TYPE == 3 .and. nspec2D_xmax_crust_mantle > 0) then
- call read_abs(1,absorb_xmax_crust_mantle,reclen_xmax_crust_mantle,NSTEP-it+1)
- endif
-
if(.NOT. GPU_MODE ) then
! on CPU
do ispec2D=1,nspec2D_xmax_crust_mantle
@@ -187,9 +177,7 @@
accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
- if (SIMULATION_TYPE == 3) then
- b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_xmax_crust_mantle(:,j,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ if( SAVE_FORWARD ) then
absorb_xmax_crust_mantle(1,j,k,ispec2D) = tx*weight
absorb_xmax_crust_mantle(2,j,k,ispec2D) = ty*weight
absorb_xmax_crust_mantle(3,j,k,ispec2D) = tz*weight
@@ -208,7 +196,7 @@
! writes absorbing boundary values
- if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmax_crust_mantle > 0 ) then
+ if( SAVE_FORWARD .and. nspec2D_xmax_crust_mantle > 0 ) then
call write_abs(1,absorb_xmax_crust_mantle,reclen_xmax_crust_mantle,it)
endif
@@ -216,11 +204,6 @@
! ymin
- ! reads absorbing boundary values
- if (SIMULATION_TYPE == 3 .and. nspec2D_ymin_crust_mantle > 0) then
- call read_abs(2,absorb_ymin_crust_mantle, reclen_ymin_crust_mantle,NSTEP-it+1)
- endif
-
if( .NOT. GPU_MODE ) then
! on CPU
do ispec2D=1,nspec2D_ymin_crust_mantle
@@ -255,9 +238,7 @@
accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
- if (SIMULATION_TYPE == 3) then
- b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_ymin_crust_mantle(:,i,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ if( SAVE_FORWARD ) then
absorb_ymin_crust_mantle(1,i,k,ispec2D) = tx*weight
absorb_ymin_crust_mantle(2,i,k,ispec2D) = ty*weight
absorb_ymin_crust_mantle(3,i,k,ispec2D) = tz*weight
@@ -275,17 +256,12 @@
endif
! writes absorbing boundary values
- if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_ymin_crust_mantle > 0 ) then
+ if( SAVE_FORWARD .and. nspec2D_ymin_crust_mantle > 0 ) then
call write_abs(2,absorb_ymin_crust_mantle,reclen_ymin_crust_mantle,it)
endif
! ymax
- ! reads absorbing boundary values
- if (SIMULATION_TYPE == 3 .and. nspec2D_ymax_crust_mantle > 0) then
- call read_abs(3,absorb_ymax_crust_mantle,reclen_ymax_crust_mantle,NSTEP-it+1)
- endif
-
if( .NOT. GPU_MODE ) then
! on CPU
do ispec2D=1,nspec2D_ymax_crust_mantle
@@ -320,9 +296,7 @@
accel_crust_mantle(2,iglob)=accel_crust_mantle(2,iglob) - ty*weight
accel_crust_mantle(3,iglob)=accel_crust_mantle(3,iglob) - tz*weight
- if (SIMULATION_TYPE == 3) then
- b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_ymax_crust_mantle(:,i,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ if( SAVE_FORWARD ) then
absorb_ymax_crust_mantle(1,i,k,ispec2D) = tx*weight
absorb_ymax_crust_mantle(2,i,k,ispec2D) = ty*weight
absorb_ymax_crust_mantle(3,i,k,ispec2D) = tz*weight
@@ -340,9 +314,202 @@
endif
! writes absorbing boundary values
- if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_ymax_crust_mantle > 0 ) then
+ if( SAVE_FORWARD .and. nspec2D_ymax_crust_mantle > 0 ) then
call write_abs(3,absorb_ymax_crust_mantle,reclen_ymax_crust_mantle,it)
endif
- end subroutine compute_stacey_crust_mantle
+ end subroutine compute_stacey_crust_mantle_forward
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine compute_stacey_crust_mantle_backward()
+
+! stacey for backward/reconstructed wavefield
+
+ use constants_solver
+
+ use specfem_par,only: &
+ ichunk,SIMULATION_TYPE,NSTEP,it
+
+ use specfem_par,only: GPU_MODE,Mesh_pointer
+
+ use specfem_par_crustmantle, only: &
+ b_accel_crust_mantle, &
+ ibool_crust_mantle, &
+ ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle, &
+ ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle, &
+ nimin_crust_mantle,nimax_crust_mantle, &
+ njmin_crust_mantle,njmax_crust_mantle, &
+ nkmin_xi_crust_mantle,nkmin_eta_crust_mantle, &
+ nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
+ nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
+ reclen_xmin_crust_mantle,reclen_xmax_crust_mantle, &
+ reclen_ymin_crust_mantle,reclen_ymax_crust_mantle, &
+ absorb_xmin_crust_mantle,absorb_xmax_crust_mantle, &
+ absorb_ymin_crust_mantle,absorb_ymax_crust_mantle
+
+ implicit none
+
+ ! local parameters
+ integer :: i,j,k,ispec,iglob,ispec2D
+
+ ! note: we use C functions for I/O as they still have a better performance than
+ ! Fortran, unformatted file I/O. however, using -assume byterecl together with Fortran functions
+ ! comes very close (only ~ 4 % slower ).
+ !
+ ! tests with intermediate storages (every 8 step) and/or asynchronious
+ ! file access (by process rank modulo 8) showed that the following,
+ ! simple approach is still fastest. (assuming that files are accessed on a local scratch disk)
+
+ ! checks
+ if (SIMULATION_TYPE /= 3 ) return
+
+ ! crust & mantle
+
+ ! xmin
+ ! if two chunks exclude this face for one of them
+ if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AC) then
+
+ ! reads absorbing boundary values
+ if( nspec2D_xmin_crust_mantle > 0 ) then
+ ! note: backward/reconstructed wavefields are read in after the Newmark time scheme in the first time loop
+ ! this leads to a corresponding boundary condition at time index NSTEP - (it-1) = NSTEP - it + 1
+ call read_abs(0,absorb_xmin_crust_mantle,reclen_xmin_crust_mantle,NSTEP-it+1)
+ endif
+
+ if ( .NOT. GPU_MODE) then
+ ! on CPU
+ do ispec2D=1,nspec2D_xmin_crust_mantle
+
+ ispec=ibelm_xmin_crust_mantle(ispec2D)
+
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_xi_crust_mantle(1,ispec2D) == 0 .or. njmin_crust_mantle(1,ispec2D) == 0) cycle
+
+ i=1
+ do k=nkmin_xi_crust_mantle(1,ispec2D),NGLLZ
+ do j=njmin_crust_mantle(1,ispec2D),njmax_crust_mantle(1,ispec2D)
+ iglob=ibool_crust_mantle(i,j,k,ispec)
+
+ b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_xmin_crust_mantle(:,j,k,ispec2D)
+ enddo
+ enddo
+ enddo
+
+ else
+ ! on GPU
+ if( nspec2D_xmin_crust_mantle > 0 ) call compute_stacey_elastic_backward_cuda(Mesh_pointer, &
+ absorb_xmin_crust_mantle, &
+ 0) ! <= xmin
+ endif
+
+ endif ! NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AC
+
+ ! xmax
+ ! if two chunks exclude this face for one of them
+ if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AB) then
+
+ ! reads absorbing boundary values
+ if( nspec2D_xmax_crust_mantle > 0 ) then
+ call read_abs(1,absorb_xmax_crust_mantle,reclen_xmax_crust_mantle,NSTEP-it+1)
+ endif
+
+ if(.NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_xmax_crust_mantle
+
+ ispec=ibelm_xmax_crust_mantle(ispec2D)
+
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_xi_crust_mantle(2,ispec2D) == 0 .or. njmin_crust_mantle(2,ispec2D) == 0) cycle
+
+ i=NGLLX
+ do k=nkmin_xi_crust_mantle(2,ispec2D),NGLLZ
+ do j=njmin_crust_mantle(2,ispec2D),njmax_crust_mantle(2,ispec2D)
+ iglob=ibool_crust_mantle(i,j,k,ispec)
+
+ b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_xmax_crust_mantle(:,j,k,ispec2D)
+ enddo
+ enddo
+ enddo
+
+ else
+ ! on GPU
+ if( nspec2D_xmax_crust_mantle > 0 ) call compute_stacey_elastic_backward_cuda(Mesh_pointer, &
+ absorb_xmax_crust_mantle, &
+ 1) ! <= xmin
+ endif
+
+ endif ! NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AB
+
+ ! ymin
+
+ ! reads absorbing boundary values
+ if( nspec2D_ymin_crust_mantle > 0 ) then
+ call read_abs(2,absorb_ymin_crust_mantle, reclen_ymin_crust_mantle,NSTEP-it+1)
+ endif
+
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_ymin_crust_mantle
+
+ ispec=ibelm_ymin_crust_mantle(ispec2D)
+
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_eta_crust_mantle(1,ispec2D) == 0 .or. nimin_crust_mantle(1,ispec2D) == 0) cycle
+
+ j=1
+ do k=nkmin_eta_crust_mantle(1,ispec2D),NGLLZ
+ do i=nimin_crust_mantle(1,ispec2D),nimax_crust_mantle(1,ispec2D)
+ iglob=ibool_crust_mantle(i,j,k,ispec)
+
+ b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_ymin_crust_mantle(:,i,k,ispec2D)
+ enddo
+ enddo
+ enddo
+
+ else
+ ! on GPU
+ if( nspec2D_ymin_crust_mantle > 0 ) call compute_stacey_elastic_backward_cuda(Mesh_pointer, &
+ absorb_ymin_crust_mantle, &
+ 2) ! <= ymin
+ endif
+
+ ! ymax
+
+ ! reads absorbing boundary values
+ if( nspec2D_ymax_crust_mantle > 0 ) then
+ call read_abs(3,absorb_ymax_crust_mantle,reclen_ymax_crust_mantle,NSTEP-it+1)
+ endif
+
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_ymax_crust_mantle
+
+ ispec=ibelm_ymax_crust_mantle(ispec2D)
+
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_eta_crust_mantle(2,ispec2D) == 0 .or. nimin_crust_mantle(2,ispec2D) == 0) cycle
+
+ j=NGLLY
+ do k=nkmin_eta_crust_mantle(2,ispec2D),NGLLZ
+ do i=nimin_crust_mantle(2,ispec2D),nimax_crust_mantle(2,ispec2D)
+ iglob=ibool_crust_mantle(i,j,k,ispec)
+
+ b_accel_crust_mantle(:,iglob)=b_accel_crust_mantle(:,iglob) - absorb_ymax_crust_mantle(:,i,k,ispec2D)
+ enddo
+ enddo
+ enddo
+
+ else
+ ! on GPU
+ if( nspec2D_ymax_crust_mantle > 0 ) call compute_stacey_elastic_backward_cuda(Mesh_pointer, &
+ absorb_ymax_crust_mantle, &
+ 3) ! <= ymax
+ endif
+
+ end subroutine compute_stacey_crust_mantle_backward
+
+
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -25,18 +25,18 @@
!
!=====================================================================
- subroutine compute_stacey_outer_core()
+ subroutine compute_stacey_outer_core_forward()
use constants_solver
use specfem_par,only: &
- ichunk,SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it, &
+ ichunk,SIMULATION_TYPE,SAVE_FORWARD,it, &
wgllwgll_xz,wgllwgll_yz,wgllwgll_xy
use specfem_par,only: GPU_MODE,Mesh_pointer
use specfem_par_outercore,only: &
- veloc_outer_core,accel_outer_core,b_accel_outer_core, &
+ veloc_outer_core,accel_outer_core, &
ibool_outer_core, &
jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core, &
jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core, &
@@ -69,18 +69,15 @@
! file access (by process rank modulo 8) showed that the following,
! simple approach is still fastest. (assuming that files are accessed on a local scratch disk)
+ ! checks
+ if( SIMULATION_TYPE == 3 ) return
+
! outer core
+
! xmin
! if two chunks exclude this face for one of them
if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AC) then
- ! reads absorbing boundary values
- if (SIMULATION_TYPE == 3 .and. nspec2D_xmin_outer_core > 0) then
- ! note: backward/reconstructed wavefields are read in after the Newmark time scheme in the first time loop
- ! this leads to a corresponding boundary condition at time index NSTEP - (it-1) = NSTEP - it + 1
- call read_abs(4,absorb_xmin_outer_core,reclen_xmin_outer_core,NSTEP-it+1)
- endif
-
if( .NOT. GPU_MODE) then
! on CPU
do ispec2D=1,nspec2D_xmin_outer_core
@@ -101,9 +98,7 @@
accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
- if (SIMULATION_TYPE == 3) then
- b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_xmin_outer_core(j,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ if( SAVE_FORWARD ) then
absorb_xmin_outer_core(j,k,ispec2D) = weight*sn
endif
enddo
@@ -118,7 +113,7 @@
endif
! writes absorbing boundary values
- if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmin_outer_core > 0 ) then
+ if( SAVE_FORWARD .and. nspec2D_xmin_outer_core > 0 ) then
call write_abs(4,absorb_xmin_outer_core,reclen_xmin_outer_core,it)
endif
@@ -128,10 +123,6 @@
! if two chunks exclude this face for one of them
if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AB) then
- if (SIMULATION_TYPE == 3 .and. nspec2D_xmax_outer_core > 0) then
- call read_abs(5,absorb_xmax_outer_core,reclen_xmax_outer_core,NSTEP-it+1)
- endif
-
if( .NOT. GPU_MODE ) then
! on CPU
do ispec2D=1,nspec2D_xmax_outer_core
@@ -152,9 +143,7 @@
accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
- if (SIMULATION_TYPE == 3) then
- b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_xmax_outer_core(j,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ if( SAVE_FORWARD ) then
absorb_xmax_outer_core(j,k,ispec2D) = weight*sn
endif
@@ -169,16 +158,13 @@
5) ! <= xmax
endif
- if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_xmax_outer_core > 0 ) then
+ if( SAVE_FORWARD .and. nspec2D_xmax_outer_core > 0 ) then
call write_abs(5,absorb_xmax_outer_core,reclen_xmax_outer_core,it)
endif
endif
! ymin
- if (SIMULATION_TYPE == 3 .and. nspec2D_ymin_outer_core > 0) then
- call read_abs(6,absorb_ymin_outer_core,reclen_ymin_outer_core,NSTEP-it+1)
- endif
if( .NOT. GPU_MODE ) then
! on CPU
@@ -200,9 +186,7 @@
accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
- if (SIMULATION_TYPE == 3) then
- b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_ymin_outer_core(i,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ if( SAVE_FORWARD ) then
absorb_ymin_outer_core(i,k,ispec2D) = weight*sn
endif
@@ -217,16 +201,12 @@
6) ! <= ymin
endif
- if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_ymin_outer_core > 0 ) then
+ if( SAVE_FORWARD .and. nspec2D_ymin_outer_core > 0 ) then
call write_abs(6,absorb_ymin_outer_core,reclen_ymin_outer_core,it)
endif
! ymax
- if (SIMULATION_TYPE == 3 .and. nspec2D_ymax_outer_core > 0) then
- call read_abs(7,absorb_ymax_outer_core,reclen_ymax_outer_core,NSTEP-it+1)
- endif
-
if( .NOT. GPU_MODE ) then
! on CPU
do ispec2D=1,nspec2D_ymax_outer_core
@@ -247,9 +227,7 @@
accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
- if (SIMULATION_TYPE == 3) then
- b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_ymax_outer_core(i,k,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ if( SAVE_FORWARD ) then
absorb_ymax_outer_core(i,k,ispec2D) = weight*sn
endif
@@ -264,16 +242,13 @@
7) ! <= ymax
endif
- if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_ymax_outer_core > 0 ) then
+ if( SAVE_FORWARD .and. nspec2D_ymax_outer_core > 0 ) then
call write_abs(7,absorb_ymax_outer_core,reclen_ymax_outer_core,it)
endif
! zmin
! for surface elements exactly on the ICB
- if (SIMULATION_TYPE == 3 .and. nspec2D_zmin_outer_core > 0) then
- call read_abs(8,absorb_zmin_outer_core,reclen_zmin,NSTEP-it+1)
- endif
if( .NOT. GPU_MODE ) then
! on CPU
@@ -292,9 +267,7 @@
accel_outer_core(iglob) = accel_outer_core(iglob) - weight*sn
- if (SIMULATION_TYPE == 3) then
- b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_zmin_outer_core(i,j,ispec2D)
- else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+ if( SAVE_FORWARD ) then
absorb_zmin_outer_core(i,j,ispec2D) = weight*sn
endif
@@ -309,8 +282,228 @@
8) ! <= zmin
endif
- if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. nspec2D_zmin_outer_core > 0 ) then
+ if( SAVE_FORWARD .and. nspec2D_zmin_outer_core > 0 ) then
call write_abs(8,absorb_zmin_outer_core,reclen_zmin,it)
endif
- end subroutine compute_stacey_outer_core
+ end subroutine compute_stacey_outer_core_forward
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine compute_stacey_outer_core_backward()
+
+ use constants_solver
+
+ use specfem_par,only: &
+ ichunk,SIMULATION_TYPE,NSTEP,it
+
+ use specfem_par,only: GPU_MODE,Mesh_pointer
+
+ use specfem_par_outercore,only: &
+ b_accel_outer_core, &
+ ibool_outer_core, &
+ nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
+ nspec2D_ymin_outer_core,nspec2D_ymax_outer_core,nspec2D_zmin_outer_core, &
+ nimin_outer_core,nimax_outer_core,nkmin_eta_outer_core, &
+ njmin_outer_core,njmax_outer_core,nkmin_xi_outer_core, &
+ absorb_xmin_outer_core,absorb_xmax_outer_core, &
+ absorb_ymin_outer_core,absorb_ymax_outer_core, &
+ absorb_zmin_outer_core, &
+ reclen_xmin_outer_core,reclen_xmax_outer_core, &
+ reclen_ymin_outer_core,reclen_ymax_outer_core, &
+ reclen_zmin, &
+ ibelm_xmin_outer_core,ibelm_xmax_outer_core, &
+ ibelm_ymin_outer_core,ibelm_ymax_outer_core, &
+ ibelm_bottom_outer_core
+ implicit none
+
+ ! local parameters
+ integer :: i,j,k,ispec2D,ispec,iglob
+
+ ! note: we use C functions for I/O as they still have a better performance than
+ ! Fortran, unformatted file I/O. however, using -assume byterecl together with Fortran functions
+ ! comes very close (only ~ 4 % slower ).
+ !
+ ! tests with intermediate storages (every 8 step) and/or asynchronious
+ ! file access (by process rank modulo 8) showed that the following,
+ ! simple approach is still fastest. (assuming that files are accessed on a local scratch disk)
+
+ ! checks
+ if (SIMULATION_TYPE /= 3 ) return
+
+ ! outer core
+
+ ! xmin
+ ! if two chunks exclude this face for one of them
+ if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AC) then
+
+ ! reads absorbing boundary values
+ if( nspec2D_xmin_outer_core > 0 ) then
+ ! note: backward/reconstructed wavefields are read in after the Newmark time scheme in the first time loop
+ ! this leads to a corresponding boundary condition at time index NSTEP - (it-1) = NSTEP - it + 1
+ call read_abs(4,absorb_xmin_outer_core,reclen_xmin_outer_core,NSTEP-it+1)
+ endif
+
+ if( .NOT. GPU_MODE) then
+ ! on CPU
+ do ispec2D=1,nspec2D_xmin_outer_core
+
+ ispec=ibelm_xmin_outer_core(ispec2D)
+
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_xi_outer_core(1,ispec2D) == 0 .or. njmin_outer_core(1,ispec2D) == 0) cycle
+
+ i=1
+ do k=nkmin_xi_outer_core(1,ispec2D),NGLLZ
+ do j=njmin_outer_core(1,ispec2D),njmax_outer_core(1,ispec2D)
+ iglob=ibool_outer_core(i,j,k,ispec)
+
+ b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_xmin_outer_core(j,k,ispec2D)
+ enddo
+ enddo
+ enddo
+
+ else
+ ! on GPU
+ if( nspec2D_xmin_outer_core > 0 ) call compute_stacey_acoustic_backward_cuda(Mesh_pointer, &
+ absorb_xmin_outer_core, &
+ 4) ! <= xmin
+ endif
+
+ endif
+
+ ! xmax
+ ! if two chunks exclude this face for one of them
+ if(NCHUNKS_VAL == 1 .or. ichunk == CHUNK_AB) then
+
+ if( nspec2D_xmax_outer_core > 0 ) then
+ call read_abs(5,absorb_xmax_outer_core,reclen_xmax_outer_core,NSTEP-it+1)
+ endif
+
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_xmax_outer_core
+
+ ispec=ibelm_xmax_outer_core(ispec2D)
+
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_xi_outer_core(2,ispec2D) == 0 .or. njmin_outer_core(2,ispec2D) == 0) cycle
+
+ i=NGLLX
+ do k=nkmin_xi_outer_core(2,ispec2D),NGLLZ
+ do j=njmin_outer_core(2,ispec2D),njmax_outer_core(2,ispec2D)
+ iglob=ibool_outer_core(i,j,k,ispec)
+
+ b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_xmax_outer_core(j,k,ispec2D)
+ enddo
+ enddo
+ enddo
+
+ else
+ ! on GPU
+ if( nspec2D_xmax_outer_core > 0 ) call compute_stacey_acoustic_backward_cuda(Mesh_pointer, &
+ absorb_xmax_outer_core, &
+ 5) ! <= xmax
+ endif
+
+ endif
+
+ ! ymin
+ if( nspec2D_ymin_outer_core > 0 ) then
+ call read_abs(6,absorb_ymin_outer_core,reclen_ymin_outer_core,NSTEP-it+1)
+ endif
+
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_ymin_outer_core
+
+ ispec=ibelm_ymin_outer_core(ispec2D)
+
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_eta_outer_core(1,ispec2D) == 0 .or. nimin_outer_core(1,ispec2D) == 0) cycle
+
+ j=1
+ do k=nkmin_eta_outer_core(1,ispec2D),NGLLZ
+ do i=nimin_outer_core(1,ispec2D),nimax_outer_core(1,ispec2D)
+ iglob=ibool_outer_core(i,j,k,ispec)
+
+ b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_ymin_outer_core(i,k,ispec2D)
+ enddo
+ enddo
+ enddo
+
+ else
+ ! on GPU
+ if( nspec2D_ymin_outer_core > 0 ) call compute_stacey_acoustic_backward_cuda(Mesh_pointer, &
+ absorb_ymin_outer_core, &
+ 6) ! <= ymin
+ endif
+
+ ! ymax
+
+ if( nspec2D_ymax_outer_core > 0 ) then
+ call read_abs(7,absorb_ymax_outer_core,reclen_ymax_outer_core,NSTEP-it+1)
+ endif
+
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D=1,nspec2D_ymax_outer_core
+
+ ispec=ibelm_ymax_outer_core(ispec2D)
+
+ ! exclude elements that are not on absorbing edges
+ if(nkmin_eta_outer_core(2,ispec2D) == 0 .or. nimin_outer_core(2,ispec2D) == 0) cycle
+
+ j=NGLLY
+ do k=nkmin_eta_outer_core(2,ispec2D),NGLLZ
+ do i=nimin_outer_core(2,ispec2D),nimax_outer_core(2,ispec2D)
+ iglob=ibool_outer_core(i,j,k,ispec)
+
+ b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_ymax_outer_core(i,k,ispec2D)
+ enddo
+ enddo
+ enddo
+
+ else
+ ! on GPU
+ if( nspec2D_ymax_outer_core > 0 ) call compute_stacey_acoustic_backward_cuda(Mesh_pointer, &
+ absorb_ymax_outer_core, &
+ 7) ! <= ymax
+ endif
+
+ ! zmin
+
+ ! for surface elements exactly on the ICB
+ if( nspec2D_zmin_outer_core > 0 ) then
+ call read_abs(8,absorb_zmin_outer_core,reclen_zmin,NSTEP-it+1)
+ endif
+
+ if( .NOT. GPU_MODE ) then
+ ! on CPU
+ do ispec2D = 1,nspec2D_zmin_outer_core
+
+ ispec = ibelm_bottom_outer_core(ispec2D)
+
+ k = 1
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ iglob = ibool_outer_core(i,j,k,ispec)
+
+ b_accel_outer_core(iglob) = b_accel_outer_core(iglob) - absorb_zmin_outer_core(i,j,ispec2D)
+ enddo
+ enddo
+ enddo
+
+ else
+ ! on GPU
+ if( nspec2D_zmin_outer_core > 0 ) call compute_stacey_acoustic_backward_cuda(Mesh_pointer, &
+ absorb_zmin_outer_core, &
+ 8) ! <= zmin
+ endif
+
+ end subroutine compute_stacey_outer_core_backward
+
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -109,7 +109,11 @@
! dump kernel arrays
if (SIMULATION_TYPE == 3) then
! crust mantle
- call save_kernels_crust_mantle()
+ if (SAVE_REGULAR_KL) then
+ call save_regular_kernels_crust_mantle()
+ else
+ call save_kernels_crust_mantle()
+ endif
! noise strength kernel
if (NOISE_TOMOGRAPHY == 3) then
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -86,12 +86,30 @@
! elastic solver for crust/mantle and inner core
call compute_forces_viscoelastic()
- ! restores last time snapshot saved for backward/reconstruction of wavefields
- ! note: this is done here after the Newmark time scheme, otherwise the indexing for sources
- ! and adjoint sources will become more complicated
- ! that is, index it for adjoint sources will match index NSTEP - 1 for backward/reconstructed wavefields
- if( SIMULATION_TYPE == 3 .and. it == 1 ) then
- call read_forward_arrays()
+ ! kernel simulations (forward and adjoint wavefields)
+ if( SIMULATION_TYPE == 3 ) then
+ ! reconstructs forward wavefields based on last store wavefield data
+
+ ! update displacement using Newmark time scheme
+ call update_displacement_Newmark_backward()
+
+ ! acoustic solver for outer core
+ ! (needs to be done first, before elastic one)
+ call compute_forces_acoustic_backward()
+
+ ! elastic solver for crust/mantle and inner core
+ call compute_forces_viscoelastic_backward()
+
+ ! restores last time snapshot saved for backward/reconstruction of wavefields
+ ! note: this is done here after the Newmark time scheme, otherwise the indexing for sources
+ ! and adjoint sources will become more complicated
+ ! that is, index it for adjoint sources will match index NSTEP - 1 for backward/reconstructed wavefields
+ if( it == 1 ) then
+ call read_forward_arrays()
+ endif
+
+ ! adjoint simulations: kernels
+ call compute_kernels()
endif
! write the seismograms with time shift
@@ -99,11 +117,6 @@
call write_seismograms()
endif
- ! adjoint simulations: kernels
- if( SIMULATION_TYPE == 3 ) then
- call compute_kernels()
- endif
-
! outputs movie files
call write_movie_output()
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_regular_points.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_regular_points.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_regular_points.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -25,7 +25,7 @@
!
!=====================================================================
-subroutine read_kl_regular_grid(myrank, GRID)
+ subroutine read_kl_regular_grid(myrank, GRID)
use constants
@@ -82,11 +82,11 @@
call exit_MPI(myrank, 'No Model points read in')
endif
-end subroutine read_kl_regular_grid
+ end subroutine read_kl_regular_grid
!==============================================================
-subroutine find_regular_grid_slice_number(slice_number, GRID, &
+ subroutine find_regular_grid_slice_number(slice_number, GRID, &
NCHUNKS, NPROC_XI, NPROC_ETA)
use constants
@@ -150,15 +150,15 @@
slice_number(isp) = nproc * nproc * (chunk_isp-1) + nproc * iproc_eta + iproc_xi
enddo
-end subroutine find_regular_grid_slice_number
+ end subroutine find_regular_grid_slice_number
!==============================================================
! how about using single precision for the iterations?
-subroutine locate_regular_points(npoints_slice,points_slice,GRID, &
- NEX_XI,nspec,xstore,ystore,zstore,ibool, &
- xigll,yigll,zigll,ispec_reg, &
- hxir_reg,hetar_reg,hgammar_reg)
+ subroutine locate_regular_points(npoints_slice,points_slice,GRID, &
+ NEX_XI,nspec,xstore,ystore,zstore,ibool, &
+ xigll,yigll,zigll,ispec_reg, &
+ hxir_reg,hetar_reg,hgammar_reg)
use constants_solver
@@ -357,11 +357,11 @@
! DEBUG
! print *, 'Maximum distance discrepancy ', maxval(dist_final(1:npoints_slice))
-end subroutine locate_regular_points
+ end subroutine locate_regular_points
!==============================================================
-subroutine hex_nodes2(iaddx,iaddy,iaddz)
+ subroutine hex_nodes2(iaddx,iaddy,iaddz)
use constants
@@ -408,11 +408,11 @@
enddo
-end subroutine hex_nodes2
+ end subroutine hex_nodes2
!==============================================================
-subroutine lagrange_any2(xi,NGLL,xigll,h)
+ subroutine lagrange_any2(xi,NGLL,xigll,h)
! subroutine to compute the Lagrange interpolants based upon the GLL points
! and their first derivatives at any point xi in [-1,1]
@@ -441,11 +441,11 @@
h(dgr) = prod1 / prod2
enddo
-end subroutine lagrange_any2
+ end subroutine lagrange_any2
!==============================================================
-subroutine chunk_map(k,xx,yy,zz,xi,eta)
+ subroutine chunk_map(k,xx,yy,zz,xi,eta)
! this program get the xi,eta for (xx,yy,zz)
! point under the k'th chunk coordinate
@@ -492,5 +492,5 @@
stop 'chunk number k < 6'
endif
-end subroutine chunk_map
+ end subroutine chunk_map
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -196,23 +196,41 @@
! allocate arrays specific to each subset
allocate(final_distance_source_subset(NSOURCES_SUBSET_current_size), &
- ispec_selected_source_subset(NSOURCES_SUBSET_current_size), &
- ispec_selected_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
- xi_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
- eta_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
- gamma_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
- final_distance_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
- x_found_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
- y_found_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
- z_found_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
- xi_source_subset(NSOURCES_SUBSET_current_size), &
- eta_source_subset(NSOURCES_SUBSET_current_size), &
- gamma_source_subset(NSOURCES_SUBSET_current_size), &
- x_found_source(NSOURCES_SUBSET_current_size), &
- y_found_source(NSOURCES_SUBSET_current_size), &
- z_found_source(NSOURCES_SUBSET_current_size),stat=ier)
+ ispec_selected_source_subset(NSOURCES_SUBSET_current_size), &
+ xi_source_subset(NSOURCES_SUBSET_current_size), &
+ eta_source_subset(NSOURCES_SUBSET_current_size), &
+ gamma_source_subset(NSOURCES_SUBSET_current_size), &
+ x_found_source(NSOURCES_SUBSET_current_size), &
+ y_found_source(NSOURCES_SUBSET_current_size), &
+ z_found_source(NSOURCES_SUBSET_current_size),stat=ier)
if( ier /= 0 ) call exit_MPI(myrank,'error allocating temporary source arrays')
+ ! arrays to collect data
+ if( myrank == 0 ) then
+ allocate(ispec_selected_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
+ xi_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
+ eta_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
+ gamma_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
+ final_distance_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
+ x_found_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
+ y_found_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1), &
+ z_found_source_all(NSOURCES_SUBSET_current_size,0:NPROCTOT_VAL-1),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating temporary source arrays for gather')
+ else
+ ! dummy arrays
+ allocate(ispec_selected_source_all(1,1), &
+ xi_source_all(1,1), &
+ eta_source_all(1,1), &
+ gamma_source_all(1,1), &
+ final_distance_source_all(1,1), &
+ x_found_source_all(1,1), &
+ y_found_source_all(1,1), &
+ z_found_source_all(1,1),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating temporary source dummy arrays for gather')
+ endif
+ ! use -1 as a flag to detect if gather fails for some reason
+ ispec_selected_source_all(:,:) = -1
+
! make sure we clean the subset array before the gather
ispec_selected_source_subset(:) = 0
final_distance_source_subset(:) = HUGEVAL
@@ -549,18 +567,22 @@
! end of loop on all the sources
enddo
+ ! synchronizes processes
+ call sync_all()
! now gather information from all the nodes
- ! use -1 as a flag to detect if gather fails for some reason
- ispec_selected_source_all(:,:) = -1
-
call gather_all_i(ispec_selected_source_subset,NSOURCES_SUBSET_current_size, &
ispec_selected_source_all,NSOURCES_SUBSET_current_size,NPROCTOT_VAL)
+ ! daniel debug
+ !print*,'rank',myrank,'ispec:',ispec_selected_source_subset(:),'all:',ispec_selected_source_all(:,:)
+
! checks that the gather operation went well
if(myrank == 0) then
- if(minval(ispec_selected_source_all(:,:)) <= 0) &
+ if(minval(ispec_selected_source_all(:,:)) <= 0) then
+ print*,'error ispec all:',ispec_selected_source_all(:,:)
call exit_MPI(myrank,'gather operation failed for source')
+ endif
endif
call gather_all_dp(xi_source_subset,NSOURCES_SUBSET_current_size, &
@@ -714,11 +736,11 @@
! deallocate arrays specific to each subset
deallocate(final_distance_source_subset)
deallocate(ispec_selected_source_subset)
+ deallocate(xi_source_subset,eta_source_subset,gamma_source_subset)
+ deallocate(x_found_source,y_found_source,z_found_source)
deallocate(ispec_selected_source_all)
deallocate(xi_source_all,eta_source_all,gamma_source_all,final_distance_source_all)
deallocate(x_found_source_all,y_found_source_all,z_found_source_all)
- deallocate(xi_source_subset,eta_source_subset,gamma_source_subset)
- deallocate(x_found_source,y_found_source,z_found_source)
enddo ! end of loop over all source subsets
Copied: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.F90 (from rev 22745, seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90)
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.F90 (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.F90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -0,0 +1,1241 @@
+!=====================================================================
+!
+! S p e c f e m 3 D G l o b e V e r s i o n 6 . 0
+! --------------------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA
+! and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+! August 2013
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+ subroutine read_mesh_databases()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+
+ implicit none
+
+ ! local parameters
+ ! timing
+ double precision, external :: wtime
+
+ ! get MPI starting time
+ time_start = wtime()
+
+#ifdef USE_SERIAL_CASCADE_FOR_IOs
+ ! serial i/o
+ you_can_start_doing_IOs = .false.
+ if (myrank > 0) call recv_singlel(you_can_start_doing_IOs,myrank-1,itag)
+#endif
+
+ ! start reading the databases
+ ! read arrays created by the mesher
+
+ ! reads "solver_data.bin" files for crust and mantle
+ call read_mesh_databases_CM()
+
+ ! reads "solver_data.bin" files for outer core
+ call read_mesh_databases_OC()
+
+ ! reads "solver_data.bin" files for inner core
+ call read_mesh_databases_IC()
+
+ ! reads "boundary.bin" files to couple mantle with outer core and inner core boundaries
+ if( ADIOS_ENABLED .and. ADIOS_FOR_ARRAYS_SOLVER ) then
+ call read_mesh_databases_coupling_adios()
+ else
+ call read_mesh_databases_coupling()
+ endif
+
+ ! reads "addressing.txt" 2-D addressing (needed for Stacey boundaries)
+ call read_mesh_databases_addressing()
+
+ ! sets up MPI interfaces, inner/outer elements and mesh coloring
+ call read_mesh_databases_MPI()
+
+ ! absorbing boundaries
+ if(ABSORBING_CONDITIONS) then
+ ! reads "stacey.bin" files
+ if( ADIOS_ENABLED .and. ADIOS_FOR_ARRAYS_SOLVER ) then
+ call read_mesh_databases_stacey_adios()
+ else
+ call read_mesh_databases_stacey()
+ endif
+ endif
+
+ ! kernels on regular grids
+ if (SAVE_REGULAR_KL) then
+ call read_mesh_databases_regular_kl()
+ endif
+
+#ifdef USE_SERIAL_CASCADE_FOR_IOs
+ ! serial i/o
+ you_can_start_doing_IOs = .true.
+ if (myrank < NPROC_XI_VAL*NPROC_ETA_VAL-1) call send_singlel(you_can_start_doing_IOs,myrank+1,itag)
+#endif
+
+ ! user output
+ call sync_all()
+ if( myrank == 0 ) then
+ ! elapsed time since beginning of mesh generation
+ tCPU = wtime() - time_start
+ write(IMAIN,*)
+ write(IMAIN,*) 'Elapsed time for reading mesh in seconds = ',sngl(tCPU)
+ write(IMAIN,*)
+ call flush_IMAIN()
+ endif
+
+ end subroutine read_mesh_databases
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_CM()
+
+! mesh for CRUST MANTLE region
+
+ use specfem_par
+ use specfem_par_crustmantle
+ implicit none
+
+ ! local parameters
+ integer :: nspec_iso,nspec_tiso,nspec_ani
+ logical :: READ_KAPPA_MU,READ_TISO
+ ! dummy array that does not need to be actually read
+ integer, dimension(:),allocatable :: dummy_idoubling
+ integer :: ier
+
+ ! crust and mantle
+
+ if(ANISOTROPIC_3D_MANTLE_VAL) then
+ READ_KAPPA_MU = .false.
+ READ_TISO = .false.
+ nspec_iso = NSPECMAX_ISO_MANTLE ! 1
+ nspec_tiso = NSPECMAX_TISO_MANTLE ! 1
+ nspec_ani = NSPEC_CRUST_MANTLE
+ else
+ READ_KAPPA_MU = .true.
+ nspec_iso = NSPEC_CRUST_MANTLE
+ if(TRANSVERSE_ISOTROPY_VAL) then
+ nspec_tiso = NSPECMAX_TISO_MANTLE
+ else
+ nspec_tiso = 1
+ endif
+ nspec_ani = NSPECMAX_ANISO_MANTLE ! 1
+ READ_TISO = .true.
+ endif
+
+ ! sets number of top elements for surface movies & noise tomography
+ NSPEC_TOP = NSPEC2D_TOP(IREGION_CRUST_MANTLE)
+
+ ! allocates mass matrices in this slice (will be fully assembled in the solver)
+ !
+ ! in the case of Stacey boundary conditions, add C*deltat/2 contribution to the mass matrix
+ ! on Stacey edges for the crust_mantle and outer_core regions but not for the inner_core region
+ ! thus the mass matrix must be replaced by three mass matrices including the "C" damping matrix
+ !
+ ! if absorbing_conditions are not set or if NCHUNKS=6, only one mass matrix is needed
+ ! for the sake of performance, only "rmassz" array will be filled and "rmassx" & "rmassy" will be obsolete
+
+ ! allocates dummy array
+ allocate(dummy_idoubling(NSPEC_CRUST_MANTLE),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating dummy idoubling in crust_mantle')
+
+ ! allocates mass matrices
+ allocate(rmassx_crust_mantle(NGLOB_XY_CM), &
+ rmassy_crust_mantle(NGLOB_XY_CM),stat=ier)
+ if(ier /= 0) stop 'error allocating rmassx, rmassy in crust_mantle'
+
+ allocate(rmassz_crust_mantle(NGLOB_CRUST_MANTLE),stat=ier)
+ if(ier /= 0) stop 'error allocating rmassz in crust_mantle'
+
+ allocate(b_rmassx_crust_mantle(NGLOB_XY_CM), &
+ b_rmassy_crust_mantle(NGLOB_XY_CM),stat=ier)
+ if(ier /= 0) stop 'error allocating b_rmassx, b_rmassy in crust_mantle'
+
+ ! reads databases file
+ if( ADIOS_ENABLED .and. ADIOS_FOR_ARRAYS_SOLVER ) then
+ call read_arrays_solver_adios(IREGION_CRUST_MANTLE,myrank, &
+ NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,NGLOB_XY_CM, &
+ nspec_iso,nspec_tiso,nspec_ani, &
+ rho_vp_crust_mantle,rho_vs_crust_mantle, &
+ xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
+ c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
+ c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
+ c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
+ c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
+ c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ ibool_crust_mantle,dummy_idoubling,ispec_is_tiso_crust_mantle, &
+ rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle,rmass_ocean_load, &
+ READ_KAPPA_MU,READ_TISO, &
+ b_rmassx_crust_mantle,b_rmassy_crust_mantle)
+ else
+ call read_arrays_solver(IREGION_CRUST_MANTLE,myrank, &
+ NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,NGLOB_XY_CM, &
+ nspec_iso,nspec_tiso,nspec_ani, &
+ rho_vp_crust_mantle,rho_vs_crust_mantle, &
+ xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
+ c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
+ c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
+ c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
+ c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
+ c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ ibool_crust_mantle,dummy_idoubling,ispec_is_tiso_crust_mantle, &
+ rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle,rmass_ocean_load, &
+ READ_KAPPA_MU,READ_TISO, &
+ b_rmassx_crust_mantle,b_rmassy_crust_mantle)
+ endif
+
+ ! check that the number of points in this slice is correct
+ if(minval(ibool_crust_mantle(:,:,:,:)) /= 1) &
+ call exit_MPI(myrank,'incorrect global numbering: iboolmin is not equal to 1 in crust and mantle')
+ if(maxval(ibool_crust_mantle(:,:,:,:)) /= NGLOB_CRUST_MANTLE) &
+ call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in crust and mantle')
+
+ deallocate(dummy_idoubling)
+
+ end subroutine read_mesh_databases_CM
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_OC()
+
+! mesh for OUTER CORE region
+
+ use specfem_par
+ use specfem_par_outercore
+
+ implicit none
+
+ ! local parameters
+ integer :: nspec_iso,nspec_tiso,nspec_ani,NGLOB_XY_dummy
+ logical :: READ_KAPPA_MU,READ_TISO
+ integer :: ier
+
+ ! dummy array that does not need to be actually read
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: dummy_rmass
+
+ logical, dimension(:), allocatable :: dummy_ispec_is_tiso
+ integer, dimension(:), allocatable :: dummy_idoubling_outer_core
+
+ ! outer core (no anisotropy nor S velocity)
+ ! rmass_ocean_load is not used in this routine because it is meaningless in the outer core
+ READ_KAPPA_MU = .false.
+ READ_TISO = .false.
+ nspec_iso = NSPEC_OUTER_CORE
+ nspec_tiso = 1
+ nspec_ani = 1
+
+ ! dummy allocation
+ NGLOB_XY_dummy = 1
+
+ allocate(dummy_rmass(NGLOB_XY_dummy), &
+ dummy_ispec_is_tiso(NSPEC_OUTER_CORE), &
+ dummy_idoubling_outer_core(NSPEC_OUTER_CORE), &
+ stat=ier)
+ if(ier /= 0) stop 'error allocating dummy rmass and dummy ispec/idoubling in outer core'
+
+ ! allocates mass matrices in this slice (will be fully assembled in the solver)
+ !
+ ! in the case of Stacey boundary conditions, add C*deltat/2 contribution to the mass matrix
+ ! on Stacey edges for the crust_mantle and outer_core regions but not for the inner_core region
+ ! thus the mass matrix must be replaced by three mass matrices including the "C" damping matrix
+ !
+ ! if absorbing_conditions are not set or if NCHUNKS=6, only one mass matrix is needed
+ ! for the sake of performance, only "rmassz" array will be filled and "rmassx" & "rmassy" will be obsolete
+ allocate(rmass_outer_core(NGLOB_OUTER_CORE),stat=ier)
+ if(ier /= 0) stop 'error allocating rmass in outer core'
+
+ if( ADIOS_ENABLED .and. ADIOS_FOR_ARRAYS_SOLVER ) then
+ call read_arrays_solver_adios(IREGION_OUTER_CORE,myrank, &
+ NSPEC_OUTER_CORE,NGLOB_OUTER_CORE,NGLOB_XY_dummy, &
+ nspec_iso,nspec_tiso,nspec_ani, &
+ vp_outer_core,dummy_array, &
+ xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+ xix_outer_core,xiy_outer_core,xiz_outer_core, &
+ etax_outer_core,etay_outer_core,etaz_outer_core, &
+ gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
+ rhostore_outer_core,kappavstore_outer_core,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ ibool_outer_core,dummy_idoubling_outer_core,dummy_ispec_is_tiso, &
+ dummy_rmass,dummy_rmass,rmass_outer_core,rmass_ocean_load, &
+ READ_KAPPA_MU,READ_TISO, &
+ dummy_rmass,dummy_rmass)
+ else
+ call read_arrays_solver(IREGION_OUTER_CORE,myrank, &
+ NSPEC_OUTER_CORE,NGLOB_OUTER_CORE,NGLOB_XY_dummy, &
+ nspec_iso,nspec_tiso,nspec_ani, &
+ vp_outer_core,dummy_array, &
+ xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+ xix_outer_core,xiy_outer_core,xiz_outer_core, &
+ etax_outer_core,etay_outer_core,etaz_outer_core, &
+ gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
+ rhostore_outer_core,kappavstore_outer_core,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ ibool_outer_core,dummy_idoubling_outer_core,dummy_ispec_is_tiso, &
+ dummy_rmass,dummy_rmass,rmass_outer_core,rmass_ocean_load, &
+ READ_KAPPA_MU,READ_TISO, &
+ dummy_rmass,dummy_rmass)
+ endif
+
+ deallocate(dummy_idoubling_outer_core,dummy_ispec_is_tiso,dummy_rmass)
+
+ ! check that the number of points in this slice is correct
+ ! check that the number of points in this slice is correct
+ if(minval(ibool_outer_core(:,:,:,:)) /= 1) &
+ call exit_MPI(myrank,'incorrect global numbering: iboolmin is not equal to 1 in outer core')
+ if(maxval(ibool_outer_core(:,:,:,:)) /= NGLOB_OUTER_CORE) then
+ call exit_MPI(myrank, 'incorrect global numbering: &
+ & iboolmax does not equal nglob in outer core')
+ endif
+
+ end subroutine read_mesh_databases_OC
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_IC()
+
+! mesh for INNER CORE region
+
+ use specfem_par
+ use specfem_par_innercore
+ implicit none
+
+ ! local parameters
+ integer :: nspec_iso,nspec_tiso,nspec_ani
+ logical :: READ_KAPPA_MU,READ_TISO
+ integer :: ier
+
+ ! dummy array that does not need to be actually read
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
+ logical, dimension(:),allocatable:: dummy_ispec_is_tiso
+
+ ! inner core (no anisotropy)
+ ! rmass_ocean_load is not used in this routine because it is meaningless in the inner core
+ READ_KAPPA_MU = .true. ! (muvstore needed for attenuation)
+ READ_TISO = .false.
+ nspec_iso = NSPEC_INNER_CORE
+ nspec_tiso = 1
+ if(ANISOTROPIC_INNER_CORE_VAL) then
+ nspec_ani = NSPEC_INNER_CORE
+ else
+ nspec_ani = 1
+ endif
+
+ allocate(dummy_ispec_is_tiso(NSPEC_INNER_CORE), &
+ stat=ier)
+ if(ier /= 0) stop 'error allocating dummy ispec in inner core'
+
+ ! allocates mass matrices in this slice (will be fully assembled in the solver)
+ !
+ ! in the case of Stacey boundary conditions, add C*deltat/2 contribution to the mass matrix
+ ! on Stacey edges for the crust_mantle and outer_core regions but not for the inner_core region
+ ! thus the mass matrix must be replaced by three mass matrices including the "C" damping matrix
+ !
+ ! if absorbing_conditions are not set or if NCHUNKS=6, only one mass matrix is needed
+ ! for the sake of performance, only "rmassz" array will be filled and "rmassx" & "rmassy" will be obsolete
+ allocate(rmassx_inner_core(NGLOB_XY_IC), &
+ rmassy_inner_core(NGLOB_XY_IC),stat=ier)
+ if(ier /= 0) stop 'error allocating rmassx, rmassy in inner_core'
+
+ allocate(rmass_inner_core(NGLOB_INNER_CORE),stat=ier)
+ if(ier /= 0) stop 'error allocating rmass in inner core'
+
+ allocate(b_rmassx_inner_core(NGLOB_XY_IC), &
+ b_rmassy_inner_core(NGLOB_XY_IC),stat=ier)
+ if(ier /= 0) stop 'error allocating b_rmassx, b_rmassy in inner_core'
+
+ ! reads in arrays
+ if( ADIOS_ENABLED .and. ADIOS_FOR_ARRAYS_SOLVER ) then
+ call read_arrays_solver_adios(IREGION_INNER_CORE,myrank, &
+ NSPEC_INNER_CORE,NGLOB_INNER_CORE,NGLOB_XY_IC, &
+ nspec_iso,nspec_tiso,nspec_ani, &
+ dummy_array,dummy_array, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ xix_inner_core,xiy_inner_core,xiz_inner_core, &
+ etax_inner_core,etay_inner_core,etaz_inner_core, &
+ gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
+ rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
+ dummy_array,dummy_array,dummy_array, &
+ c11store_inner_core,c12store_inner_core,c13store_inner_core, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,c33store_inner_core, &
+ dummy_array,dummy_array,dummy_array, &
+ c44store_inner_core,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ ibool_inner_core,idoubling_inner_core,dummy_ispec_is_tiso, &
+ rmassx_inner_core,rmassy_inner_core,rmass_inner_core,rmass_ocean_load, &
+ READ_KAPPA_MU,READ_TISO, &
+ b_rmassx_inner_core,b_rmassy_inner_core)
+ else
+ call read_arrays_solver(IREGION_INNER_CORE,myrank, &
+ NSPEC_INNER_CORE,NGLOB_INNER_CORE,NGLOB_XY_IC, &
+ nspec_iso,nspec_tiso,nspec_ani, &
+ dummy_array,dummy_array, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ xix_inner_core,xiy_inner_core,xiz_inner_core, &
+ etax_inner_core,etay_inner_core,etaz_inner_core, &
+ gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
+ rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
+ dummy_array,dummy_array,dummy_array, &
+ c11store_inner_core,c12store_inner_core,c13store_inner_core, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,c33store_inner_core, &
+ dummy_array,dummy_array,dummy_array, &
+ c44store_inner_core,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ ibool_inner_core,idoubling_inner_core,dummy_ispec_is_tiso, &
+ rmassx_inner_core,rmassy_inner_core,rmass_inner_core,rmass_ocean_load, &
+ READ_KAPPA_MU,READ_TISO, &
+ b_rmassx_inner_core,b_rmassy_inner_core)
+ endif
+
+ deallocate(dummy_ispec_is_tiso)
+
+ ! check that the number of points in this slice is correct
+ if(minval(ibool_inner_core(:,:,:,:)) /= 1 .or. maxval(ibool_inner_core(:,:,:,:)) /= NGLOB_INNER_CORE) &
+ call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in inner core')
+
+ end subroutine read_mesh_databases_IC
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_coupling()
+
+! to couple mantle with outer core
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+
+ implicit none
+
+ ! local parameters
+ integer :: njunk1,njunk2,njunk3
+ integer :: ier
+
+ ! user output
+ if( myrank == 0 ) write(IMAIN,*) 'reading coupling surfaces...'
+
+ ! crust and mantle
+ ! create name of database
+ call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
+
+ ! Stacey put back
+ open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
+ status='old',form='unformatted',action='read',iostat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error opening crust_mantle boundary.bin file')
+
+ read(27) nspec2D_xmin_crust_mantle
+ read(27) nspec2D_xmax_crust_mantle
+ read(27) nspec2D_ymin_crust_mantle
+ read(27) nspec2D_ymax_crust_mantle
+ read(27) njunk1
+ read(27) njunk2
+
+! boundary parameters
+ read(27) ibelm_xmin_crust_mantle
+ read(27) ibelm_xmax_crust_mantle
+ read(27) ibelm_ymin_crust_mantle
+ read(27) ibelm_ymax_crust_mantle
+ read(27) ibelm_bottom_crust_mantle
+ read(27) ibelm_top_crust_mantle
+
+ read(27) normal_xmin_crust_mantle
+ read(27) normal_xmax_crust_mantle
+ read(27) normal_ymin_crust_mantle
+ read(27) normal_ymax_crust_mantle
+ read(27) normal_bottom_crust_mantle
+ read(27) normal_top_crust_mantle
+
+ read(27) jacobian2D_xmin_crust_mantle
+ read(27) jacobian2D_xmax_crust_mantle
+ read(27) jacobian2D_ymin_crust_mantle
+ read(27) jacobian2D_ymax_crust_mantle
+ read(27) jacobian2D_bottom_crust_mantle
+ read(27) jacobian2D_top_crust_mantle
+ close(27)
+
+ ! read parameters to couple fluid and solid regions
+ !
+ ! outer core
+
+ ! create name of database
+ call create_name_database(prname,myrank,IREGION_OUTER_CORE,LOCAL_PATH)
+
+ ! boundary parameters
+
+ ! Stacey put back
+ open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
+ status='old',form='unformatted',action='read',iostat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error opening outer_core boundary.bin file')
+
+ read(27) nspec2D_xmin_outer_core
+ read(27) nspec2D_xmax_outer_core
+ read(27) nspec2D_ymin_outer_core
+ read(27) nspec2D_ymax_outer_core
+ read(27) njunk1
+ read(27) njunk2
+
+ nspec2D_zmin_outer_core = NSPEC2D_BOTTOM(IREGION_OUTER_CORE)
+
+ read(27) ibelm_xmin_outer_core
+ read(27) ibelm_xmax_outer_core
+ read(27) ibelm_ymin_outer_core
+ read(27) ibelm_ymax_outer_core
+ read(27) ibelm_bottom_outer_core
+ read(27) ibelm_top_outer_core
+
+ read(27) normal_xmin_outer_core
+ read(27) normal_xmax_outer_core
+ read(27) normal_ymin_outer_core
+ read(27) normal_ymax_outer_core
+ read(27) normal_bottom_outer_core
+ read(27) normal_top_outer_core
+
+ read(27) jacobian2D_xmin_outer_core
+ read(27) jacobian2D_xmax_outer_core
+ read(27) jacobian2D_ymin_outer_core
+ read(27) jacobian2D_ymax_outer_core
+ read(27) jacobian2D_bottom_outer_core
+ read(27) jacobian2D_top_outer_core
+ close(27)
+
+ !
+ ! inner core
+ !
+
+ ! create name of database
+ call create_name_database(prname,myrank,IREGION_INNER_CORE,LOCAL_PATH)
+
+ ! read info for vertical edges for central cube matching in inner core
+ open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
+ status='old',form='unformatted',action='read',iostat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error opening inner_core boundary.bin file')
+
+ read(27) nspec2D_xmin_inner_core
+ read(27) nspec2D_xmax_inner_core
+ read(27) nspec2D_ymin_inner_core
+ read(27) nspec2D_ymax_inner_core
+ read(27) njunk1
+ read(27) njunk2
+
+ ! boundary parameters
+ read(27) ibelm_xmin_inner_core
+ read(27) ibelm_xmax_inner_core
+ read(27) ibelm_ymin_inner_core
+ read(27) ibelm_ymax_inner_core
+ read(27) ibelm_bottom_inner_core
+ read(27) ibelm_top_inner_core
+ close(27)
+
+ ! -- Boundary Mesh for crust and mantle ---
+ if (SAVE_BOUNDARY_MESH .and. SIMULATION_TYPE == 3) then
+
+ call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
+
+ open(unit=27,file=prname(1:len_trim(prname))//'boundary_disc.bin', &
+ status='old',form='unformatted',action='read',iostat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error opening boundary_disc.bin file')
+
+ read(27) njunk1,njunk2,njunk3
+ if (njunk1 /= NSPEC2D_MOHO .and. njunk2 /= NSPEC2D_400 .and. njunk3 /= NSPEC2D_670) &
+ call exit_mpi(myrank, 'Error reading ibelm_disc.bin file')
+ read(27) ibelm_moho_top
+ read(27) ibelm_moho_bot
+ read(27) ibelm_400_top
+ read(27) ibelm_400_bot
+ read(27) ibelm_670_top
+ read(27) ibelm_670_bot
+ read(27) normal_moho
+ read(27) normal_400
+ read(27) normal_670
+ close(27)
+
+ k_top = 1
+ k_bot = NGLLZ
+
+ ! initialization
+ moho_kl = 0.; d400_kl = 0.; d670_kl = 0.; cmb_kl = 0.; icb_kl = 0.
+ endif
+
+ end subroutine read_mesh_databases_coupling
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_addressing()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+
+ implicit none
+
+ ! local parameters
+ integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+ integer, dimension(0:NPROCTOT_VAL-1) :: ichunk_slice,iproc_xi_slice,iproc_eta_slice
+ integer :: ier,iproc,iproc_read,iproc_xi,iproc_eta
+
+ ! open file with global slice number addressing
+ if(myrank == 0) then
+ open(unit=IIN,file=trim(OUTPUT_FILES)//'/addressing.txt',status='old',action='read',iostat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error opening addressing.txt')
+
+ do iproc = 0,NPROCTOT_VAL-1
+ read(IIN,*) iproc_read,ichunk,iproc_xi,iproc_eta
+
+ if(iproc_read /= iproc) call exit_MPI(myrank,'incorrect slice number read')
+
+ addressing(ichunk,iproc_xi,iproc_eta) = iproc
+ ichunk_slice(iproc) = ichunk
+ iproc_xi_slice(iproc) = iproc_xi
+ iproc_eta_slice(iproc) = iproc_eta
+ enddo
+ close(IIN)
+ endif
+
+ ! broadcast the information read on the master to the nodes
+ call bcast_all_i(addressing,NCHUNKS_VAL*NPROC_XI_VAL*NPROC_ETA_VAL)
+ call bcast_all_i(ichunk_slice,NPROCTOT_VAL)
+ call bcast_all_i(iproc_xi_slice,NPROCTOT_VAL)
+ call bcast_all_i(iproc_eta_slice,NPROCTOT_VAL)
+
+ ! output a topology map of slices - fix 20x by nproc
+ if (myrank == 0 ) then
+ if( NCHUNKS_VAL == 6 .and. NPROCTOT_VAL < 1000 ) then
+ write(IMAIN,*) 'Spatial distribution of the slices'
+ do iproc_xi = NPROC_XI_VAL-1, 0, -1
+ write(IMAIN,'(20x)',advance='no')
+ do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+ ichunk = CHUNK_AB
+ write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+ enddo
+ write(IMAIN,'(1x)',advance='yes')
+ enddo
+ write(IMAIN, *) ' '
+ do iproc_xi = NPROC_XI_VAL-1, 0, -1
+ write(IMAIN,'(1x)',advance='no')
+ do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+ ichunk = CHUNK_BC
+ write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+ enddo
+ write(IMAIN,'(3x)',advance='no')
+ do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+ ichunk = CHUNK_AC
+ write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+ enddo
+ write(IMAIN,'(3x)',advance='no')
+ do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+ ichunk = CHUNK_BC_ANTIPODE
+ write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+ enddo
+ write(IMAIN,'(1x)',advance='yes')
+ enddo
+ write(IMAIN, *) ' '
+ do iproc_xi = NPROC_XI_VAL-1, 0, -1
+ write(IMAIN,'(20x)',advance='no')
+ do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+ ichunk = CHUNK_AB_ANTIPODE
+ write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+ enddo
+ write(IMAIN,'(1x)',advance='yes')
+ enddo
+ write(IMAIN, *) ' '
+ do iproc_xi = NPROC_XI_VAL-1, 0, -1
+ write(IMAIN,'(20x)',advance='no')
+ do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+ ichunk = CHUNK_AC_ANTIPODE
+ write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+ enddo
+ write(IMAIN,'(1x)',advance='yes')
+ enddo
+ write(IMAIN, *) ' '
+ endif
+ endif
+
+ ! determine chunk number and local slice coordinates using addressing
+ ! (needed for Stacey conditions)
+ ichunk = ichunk_slice(myrank)
+
+ end subroutine read_mesh_databases_addressing
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_MPI()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ implicit none
+
+ ! local parameters
+ real :: percentage_edge
+ integer :: ier
+
+ ! read MPI interfaces from file
+
+ ! crust mantle
+ if( ADIOS_ENABLED .and. ADIOS_FOR_MPI_ARRAYS ) then
+ call read_mesh_databases_MPI_CM_adios()
+ else
+ call read_mesh_databases_MPI_CM()
+ endif
+
+ allocate(buffer_send_vector_crust_mantle(NDIM,max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
+ buffer_recv_vector_crust_mantle(NDIM,max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
+ request_send_vector_cm(num_interfaces_crust_mantle), &
+ request_recv_vector_cm(num_interfaces_crust_mantle), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_crust_mantle etc.')
+
+ if( SIMULATION_TYPE == 3 ) then
+ allocate(b_buffer_send_vector_cm(NDIM,max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
+ b_buffer_recv_vector_cm(NDIM,max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
+ b_request_send_vector_cm(num_interfaces_crust_mantle), &
+ b_request_recv_vector_cm(num_interfaces_crust_mantle), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_cm etc.')
+ endif
+
+ ! outer core
+ if( ADIOS_ENABLED .and. ADIOS_FOR_MPI_ARRAYS ) then
+ call read_mesh_databases_MPI_OC_adios()
+ else
+ call read_mesh_databases_MPI_OC()
+ endif
+
+ allocate(buffer_send_scalar_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
+ buffer_recv_scalar_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
+ request_send_scalar_oc(num_interfaces_outer_core), &
+ request_recv_scalar_oc(num_interfaces_outer_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_outer_core etc.')
+
+ if( SIMULATION_TYPE == 3 ) then
+ allocate(b_buffer_send_scalar_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
+ b_buffer_recv_scalar_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
+ b_request_send_scalar_oc(num_interfaces_outer_core), &
+ b_request_recv_scalar_oc(num_interfaces_outer_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_outer_core etc.')
+ endif
+
+ ! inner core
+ if( ADIOS_ENABLED .and. ADIOS_FOR_MPI_ARRAYS ) then
+ call read_mesh_databases_MPI_IC_adios()
+ else
+ call read_mesh_databases_MPI_IC()
+ endif
+
+ allocate(buffer_send_vector_inner_core(NDIM,max_nibool_interfaces_ic,num_interfaces_inner_core), &
+ buffer_recv_vector_inner_core(NDIM,max_nibool_interfaces_ic,num_interfaces_inner_core), &
+ request_send_vector_ic(num_interfaces_inner_core), &
+ request_recv_vector_ic(num_interfaces_inner_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_inner_core etc.')
+
+ if( SIMULATION_TYPE == 3 ) then
+ allocate(b_buffer_send_vector_inner_core(NDIM,max_nibool_interfaces_ic,num_interfaces_inner_core), &
+ b_buffer_recv_vector_inner_core(NDIM,max_nibool_interfaces_ic,num_interfaces_inner_core), &
+ b_request_send_vector_ic(num_interfaces_inner_core), &
+ b_request_recv_vector_ic(num_interfaces_inner_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_inner_core etc.')
+ endif
+
+
+ ! user output
+ if(myrank == 0) then
+ write(IMAIN,*) 'for overlapping of communications with calculations:'
+ write(IMAIN,*)
+
+ percentage_edge = 100. * nspec_outer_crust_mantle / real(NSPEC_CRUST_MANTLE)
+ write(IMAIN,*) 'percentage of edge elements in crust/mantle ',percentage_edge,'%'
+ write(IMAIN,*) 'percentage of volume elements in crust/mantle ',100. - percentage_edge,'%'
+ write(IMAIN,*)
+
+ percentage_edge = 100.* nspec_outer_outer_core / real(NSPEC_OUTER_CORE)
+ write(IMAIN,*) 'percentage of edge elements in outer core ',percentage_edge,'%'
+ write(IMAIN,*) 'percentage of volume elements in outer core ',100. - percentage_edge,'%'
+ write(IMAIN,*)
+
+ percentage_edge = 100. * nspec_outer_inner_core / real(NSPEC_INNER_CORE)
+ write(IMAIN,*) 'percentage of edge elements in inner core ',percentage_edge,'%'
+ write(IMAIN,*) 'percentage of volume elements in inner core ',100. - percentage_edge,'%'
+ write(IMAIN,*)
+ call flush_IMAIN()
+ endif
+ ! synchronizes MPI processes
+ call sync_all()
+
+ end subroutine read_mesh_databases_MPI
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_MPI_CM()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ implicit none
+
+ ! local parameters
+ integer :: ier
+
+ ! crust mantle region
+
+ ! create the name for the database of the current slide and region
+ call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
+
+ open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
+ status='old',action='read',form='unformatted',iostat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_mpi.bin')
+
+ ! MPI interfaces
+ read(IIN) num_interfaces_crust_mantle
+ allocate(my_neighbours_crust_mantle(num_interfaces_crust_mantle), &
+ nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating array my_neighbours_crust_mantle etc.')
+
+ if( num_interfaces_crust_mantle > 0 ) then
+ read(IIN) max_nibool_interfaces_cm
+ allocate(ibool_interfaces_crust_mantle(max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_crust_mantle')
+
+ read(IIN) my_neighbours_crust_mantle
+ read(IIN) nibool_interfaces_crust_mantle
+ read(IIN) ibool_interfaces_crust_mantle
+ else
+ ! dummy array
+ max_nibool_interfaces_cm = 0
+ allocate(ibool_interfaces_crust_mantle(0,0),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array dummy ibool_interfaces_crust_mantle')
+ endif
+
+ ! inner / outer elements
+ read(IIN) nspec_inner_crust_mantle,nspec_outer_crust_mantle
+ read(IIN) num_phase_ispec_crust_mantle
+ if( num_phase_ispec_crust_mantle < 0 ) &
+ call exit_mpi(myrank,'error num_phase_ispec_crust_mantle is < zero')
+
+ allocate(phase_ispec_inner_crust_mantle(num_phase_ispec_crust_mantle,2),&
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating array phase_ispec_inner_crust_mantle')
+
+ if(num_phase_ispec_crust_mantle > 0 ) read(IIN) phase_ispec_inner_crust_mantle
+
+ ! mesh coloring for GPUs
+ if( USE_MESH_COLORING_GPU ) then
+ ! colors
+ read(IIN) num_colors_outer_crust_mantle,num_colors_inner_crust_mantle
+
+ allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle + num_colors_inner_crust_mantle), &
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating num_elem_colors_crust_mantle array')
+
+ read(IIN) num_elem_colors_crust_mantle
+ else
+ ! allocates dummy arrays
+ num_colors_outer_crust_mantle = 0
+ num_colors_inner_crust_mantle = 0
+ allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle + num_colors_inner_crust_mantle), &
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating num_elem_colors_crust_mantle array')
+ endif
+
+ close(IIN)
+
+ end subroutine read_mesh_databases_MPI_CM
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_MPI_OC()
+
+ use specfem_par
+ use specfem_par_outercore
+ implicit none
+
+ ! local parameters
+ integer :: ier
+
+ ! crust mantle region
+
+ ! create the name for the database of the current slide and region
+ call create_name_database(prname,myrank,IREGION_OUTER_CORE,LOCAL_PATH)
+
+ open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
+ status='old',action='read',form='unformatted',iostat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_mpi.bin')
+
+ ! MPI interfaces
+ read(IIN) num_interfaces_outer_core
+ allocate(my_neighbours_outer_core(num_interfaces_outer_core), &
+ nibool_interfaces_outer_core(num_interfaces_outer_core), &
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating array my_neighbours_outer_core etc.')
+
+ if( num_interfaces_outer_core > 0 ) then
+ read(IIN) max_nibool_interfaces_oc
+ allocate(ibool_interfaces_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_outer_core')
+
+ read(IIN) my_neighbours_outer_core
+ read(IIN) nibool_interfaces_outer_core
+ read(IIN) ibool_interfaces_outer_core
+ else
+ ! dummy array
+ max_nibool_interfaces_oc = 0
+ allocate(ibool_interfaces_outer_core(0,0),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array dummy ibool_interfaces_outer_core')
+ endif
+
+ ! inner / outer elements
+ read(IIN) nspec_inner_outer_core,nspec_outer_outer_core
+ read(IIN) num_phase_ispec_outer_core
+ if( num_phase_ispec_outer_core < 0 ) &
+ call exit_mpi(myrank,'error num_phase_ispec_outer_core is < zero')
+
+ allocate(phase_ispec_inner_outer_core(num_phase_ispec_outer_core,2),&
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating array phase_ispec_inner_outer_core')
+
+ if(num_phase_ispec_outer_core > 0 ) read(IIN) phase_ispec_inner_outer_core
+
+ ! mesh coloring for GPUs
+ if( USE_MESH_COLORING_GPU ) then
+ ! colors
+ read(IIN) num_colors_outer_outer_core,num_colors_inner_outer_core
+
+ allocate(num_elem_colors_outer_core(num_colors_outer_outer_core + num_colors_inner_outer_core), &
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating num_elem_colors_outer_core array')
+
+ read(IIN) num_elem_colors_outer_core
+ else
+ ! allocates dummy arrays
+ num_colors_outer_outer_core = 0
+ num_colors_inner_outer_core = 0
+ allocate(num_elem_colors_outer_core(num_colors_outer_outer_core + num_colors_inner_outer_core), &
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating num_elem_colors_outer_core array')
+ endif
+
+ close(IIN)
+
+ end subroutine read_mesh_databases_MPI_OC
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_MPI_IC()
+
+ use specfem_par
+ use specfem_par_innercore
+ implicit none
+
+ ! local parameters
+ integer :: ier
+
+ ! crust mantle region
+
+ ! create the name for the database of the current slide and region
+ call create_name_database(prname,myrank,IREGION_INNER_CORE,LOCAL_PATH)
+
+ open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
+ status='old',action='read',form='unformatted',iostat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_mpi.bin')
+
+ ! MPI interfaces
+ read(IIN) num_interfaces_inner_core
+ allocate(my_neighbours_inner_core(num_interfaces_inner_core), &
+ nibool_interfaces_inner_core(num_interfaces_inner_core), &
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating array my_neighbours_inner_core etc.')
+
+ if( num_interfaces_inner_core > 0 ) then
+ read(IIN) max_nibool_interfaces_ic
+ allocate(ibool_interfaces_inner_core(max_nibool_interfaces_ic,num_interfaces_inner_core), &
+ stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_inner_core')
+
+ read(IIN) my_neighbours_inner_core
+ read(IIN) nibool_interfaces_inner_core
+ read(IIN) ibool_interfaces_inner_core
+ else
+ ! dummy array
+ max_nibool_interfaces_ic = 0
+ allocate(ibool_interfaces_inner_core(0,0),stat=ier)
+ if( ier /= 0 ) call exit_mpi(myrank,'error allocating array dummy ibool_interfaces_inner_core')
+ endif
+
+ ! inner / outer elements
+ read(IIN) nspec_inner_inner_core,nspec_outer_inner_core
+ read(IIN) num_phase_ispec_inner_core
+ if( num_phase_ispec_inner_core < 0 ) &
+ call exit_mpi(myrank,'error num_phase_ispec_inner_core is < zero')
+
+ allocate(phase_ispec_inner_inner_core(num_phase_ispec_inner_core,2),&
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating array phase_ispec_inner_inner_core')
+
+ if(num_phase_ispec_inner_core > 0 ) read(IIN) phase_ispec_inner_inner_core
+
+ ! mesh coloring for GPUs
+ if( USE_MESH_COLORING_GPU ) then
+ ! colors
+ read(IIN) num_colors_outer_inner_core,num_colors_inner_inner_core
+
+ allocate(num_elem_colors_inner_core(num_colors_outer_inner_core + num_colors_inner_inner_core), &
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating num_elem_colors_inner_core array')
+
+ read(IIN) num_elem_colors_inner_core
+ else
+ ! allocates dummy arrays
+ num_colors_outer_inner_core = 0
+ num_colors_inner_inner_core = 0
+ allocate(num_elem_colors_inner_core(num_colors_outer_inner_core + num_colors_inner_inner_core), &
+ stat=ier)
+ if( ier /= 0 ) &
+ call exit_mpi(myrank,'error allocating num_elem_colors_inner_core array')
+ endif
+
+ close(IIN)
+
+ end subroutine read_mesh_databases_MPI_IC
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_stacey()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+
+ implicit none
+
+ ! local parameters
+ integer :: ier
+
+ ! crust and mantle
+
+ ! create name of database
+ call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
+
+ ! read arrays for Stacey conditions
+ open(unit=27,file=prname(1:len_trim(prname))//'stacey.bin', &
+ status='old',form='unformatted',action='read',iostat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error opening stacey.bin file for crust mantle')
+
+ read(27) nimin_crust_mantle
+ read(27) nimax_crust_mantle
+ read(27) njmin_crust_mantle
+ read(27) njmax_crust_mantle
+ read(27) nkmin_xi_crust_mantle
+ read(27) nkmin_eta_crust_mantle
+ close(27)
+
+ ! outer core
+
+ ! create name of database
+ call create_name_database(prname,myrank,IREGION_OUTER_CORE,LOCAL_PATH)
+
+ ! read arrays for Stacey conditions
+ open(unit=27,file=prname(1:len_trim(prname))//'stacey.bin', &
+ status='old',form='unformatted',action='read',iostat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error opening stacey.bin file for outer core')
+
+ read(27) nimin_outer_core
+ read(27) nimax_outer_core
+ read(27) njmin_outer_core
+ read(27) njmax_outer_core
+ read(27) nkmin_xi_outer_core
+ read(27) nkmin_eta_outer_core
+ close(27)
+
+ end subroutine read_mesh_databases_stacey
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine read_mesh_databases_regular_kl()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+
+ implicit none
+
+ ! local parameters
+ integer, dimension(:), allocatable :: slice_number
+ integer :: i,isp,ier
+ ! grid parameters
+ type kl_reg_grid_variables
+ sequence
+ real dlat
+ real dlon
+ integer nlayer
+ real rlayer(NM_KL_REG_LAYER)
+ integer ndoubling(NM_KL_REG_LAYER)
+ integer nlat(NM_KL_REG_LAYER)
+ integer nlon(NM_KL_REG_LAYER)
+ integer npts_total
+ integer npts_before_layer(NM_KL_REG_LAYER+1)
+ end type kl_reg_grid_variables
+ type (kl_reg_grid_variables) KL_REG_GRID
+
+ call read_kl_regular_grid(myrank, KL_REG_GRID)
+
+ if( myrank == 0 ) then
+ ! master process
+ allocate(slice_number(KL_REG_GRID%npts_total),stat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error allocating slice_number array')
+
+ ! print *, 'slice npts =', KL_REG_GRID%npts_total
+ call find_regular_grid_slice_number(slice_number, KL_REG_GRID, NCHUNKS_VAL, &
+ NPROC_XI_VAL, NPROC_ETA_VAL)
+
+ do i = NPROCTOT_VAL-1,0,-1
+ npoints_slice = 0
+ do isp = 1,KL_REG_GRID%npts_total
+ if (slice_number(isp) == i) then
+ npoints_slice = npoints_slice + 1
+ if (npoints_slice > NM_KL_REG_PTS) stop 'Exceeding NM_KL_REG_PTS limit'
+ points_slice(npoints_slice) = isp
+ endif
+ enddo
+
+ if (i /= 0) then
+ call send_singlei(npoints_slice,i,i)
+ if (npoints_slice > 0) then
+ call send_i(points_slice,npoints_slice,i,2*i)
+ endif
+ endif
+ enddo
+
+ open(unit=IOUT,file=trim(OUTPUT_FILES)//'/kl_grid_slice.txt',status='unknown',action='write',iostat=ier)
+ if( ier /= 0 ) call exit_MPI(myrank,'error opening file kl_grid_slice.txt for writing')
+ write(IOUT,*) slice_number
+ close(IOUT)
+
+ deallocate(slice_number)
+ else
+ ! slave processes
+ call recv_singlei(npoints_slice,0,myrank)
+ if (npoints_slice > 0) then
+ call recv_i(points_slice,npoints_slice,0,2*myrank)
+ endif
+ endif
+
+ ! this is the core part that takes up most of the computation time,
+ ! and presumably the more processors involved the faster.
+ if (npoints_slice > 0) then
+ call locate_regular_points(npoints_slice, points_slice, KL_REG_GRID, &
+ NEX_XI, NSPEC_CRUST_MANTLE, &
+ xstore_crust_mantle, ystore_crust_mantle, zstore_crust_mantle, &
+ ibool_crust_mantle, &
+ xigll, yigll, zigll, &
+ ispec_reg, hxir_reg, hetar_reg, hgammar_reg)
+ endif
+
+ ! user output
+ if (myrank==0) then
+ write(IMAIN,*) ' '
+ write(IMAIN,*) 'Finished locating kernel output regular grid'
+ write(IMAIN,*) ' '
+ call flush_IMAIN()
+ endif
+
+ end subroutine read_mesh_databases_regular_kl
+
+
Deleted: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -1,1127 +0,0 @@
-!=====================================================================
-!
-! S p e c f e m 3 D G l o b e V e r s i o n 6 . 0
-! --------------------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA
-! and CNRS / INRIA / University of Pau, France
-! (c) Princeton University and CNRS / INRIA / University of Pau
-! August 2013
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
- subroutine read_mesh_databases()
-
- use specfem_par
- use specfem_par_crustmantle
- use specfem_par_innercore
- use specfem_par_outercore
-
- implicit none
-
- ! local parameters
- ! timing
- double precision, external :: wtime
-
- ! get MPI starting time
- time_start = wtime()
-
- ! start reading the databases
- ! read arrays created by the mesher
-
- ! reads "solver_data.bin" files for crust and mantle
- call read_mesh_databases_CM()
-
- ! reads "solver_data.bin" files for outer core
- call read_mesh_databases_OC()
-
- ! reads "solver_data.bin" files for inner core
- call read_mesh_databases_IC()
-
- ! reads "boundary.bin" files to couple mantle with outer core and inner core boundaries
- if( ADIOS_ENABLED .and. ADIOS_FOR_ARRAYS_SOLVER ) then
- call read_mesh_databases_coupling_adios()
- else
- call read_mesh_databases_coupling()
- endif
-
- ! reads "addressing.txt" 2-D addressing (needed for Stacey boundaries)
- call read_mesh_databases_addressing()
-
- ! sets up MPI interfaces, inner/outer elements and mesh coloring
- call read_mesh_databases_MPI()
-
- ! absorbing boundaries
- if(ABSORBING_CONDITIONS) then
- ! reads "stacey.bin" files
- if( ADIOS_ENABLED .and. ADIOS_FOR_ARRAYS_SOLVER ) then
- call read_mesh_databases_stacey_adios()
- else
- call read_mesh_databases_stacey()
- endif
- endif
-
- ! user output
- call sync_all()
- if( myrank == 0 ) then
- ! elapsed time since beginning of mesh generation
- tCPU = wtime() - time_start
- write(IMAIN,*)
- write(IMAIN,*) 'Elapsed time for reading mesh in seconds = ',sngl(tCPU)
- write(IMAIN,*)
- call flush_IMAIN()
- endif
-
- end subroutine read_mesh_databases
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine read_mesh_databases_CM()
-
-! mesh for CRUST MANTLE region
-
- use specfem_par
- use specfem_par_crustmantle
- implicit none
-
- ! local parameters
- integer :: nspec_iso,nspec_tiso,nspec_ani
- logical :: READ_KAPPA_MU,READ_TISO
- ! dummy array that does not need to be actually read
- integer, dimension(:),allocatable :: dummy_idoubling
- integer :: ier
-
- ! crust and mantle
-
- if(ANISOTROPIC_3D_MANTLE_VAL) then
- READ_KAPPA_MU = .false.
- READ_TISO = .false.
- nspec_iso = NSPECMAX_ISO_MANTLE ! 1
- nspec_tiso = NSPECMAX_TISO_MANTLE ! 1
- nspec_ani = NSPEC_CRUST_MANTLE
- else
- READ_KAPPA_MU = .true.
- nspec_iso = NSPEC_CRUST_MANTLE
- if(TRANSVERSE_ISOTROPY_VAL) then
- nspec_tiso = NSPECMAX_TISO_MANTLE
- else
- nspec_tiso = 1
- endif
- nspec_ani = NSPECMAX_ANISO_MANTLE ! 1
- READ_TISO = .true.
- endif
-
- ! sets number of top elements for surface movies & noise tomography
- NSPEC_TOP = NSPEC2D_TOP(IREGION_CRUST_MANTLE)
-
- ! allocates mass matrices in this slice (will be fully assembled in the solver)
- !
- ! in the case of Stacey boundary conditions, add C*deltat/2 contribution to the mass matrix
- ! on Stacey edges for the crust_mantle and outer_core regions but not for the inner_core region
- ! thus the mass matrix must be replaced by three mass matrices including the "C" damping matrix
- !
- ! if absorbing_conditions are not set or if NCHUNKS=6, only one mass matrix is needed
- ! for the sake of performance, only "rmassz" array will be filled and "rmassx" & "rmassy" will be obsolete
-
- ! allocates dummy array
- allocate(dummy_idoubling(NSPEC_CRUST_MANTLE),stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating dummy idoubling in crust_mantle')
-
- ! allocates mass matrices
- allocate(rmassx_crust_mantle(NGLOB_XY_CM), &
- rmassy_crust_mantle(NGLOB_XY_CM),stat=ier)
- if(ier /= 0) stop 'error allocating rmassx, rmassy in crust_mantle'
-
- allocate(rmassz_crust_mantle(NGLOB_CRUST_MANTLE),stat=ier)
- if(ier /= 0) stop 'error allocating rmassz in crust_mantle'
-
- allocate(b_rmassx_crust_mantle(NGLOB_XY_CM), &
- b_rmassy_crust_mantle(NGLOB_XY_CM),stat=ier)
- if(ier /= 0) stop 'error allocating b_rmassx, b_rmassy in crust_mantle'
-
- ! reads databases file
- if( ADIOS_ENABLED .and. ADIOS_FOR_ARRAYS_SOLVER ) then
- call read_arrays_solver_adios(IREGION_CRUST_MANTLE,myrank, &
- NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,NGLOB_XY_CM, &
- nspec_iso,nspec_tiso,nspec_ani, &
- rho_vp_crust_mantle,rho_vs_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,dummy_idoubling,ispec_is_tiso_crust_mantle, &
- rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle,rmass_ocean_load, &
- READ_KAPPA_MU,READ_TISO, &
- b_rmassx_crust_mantle,b_rmassy_crust_mantle)
- else
- call read_arrays_solver(IREGION_CRUST_MANTLE,myrank, &
- NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,NGLOB_XY_CM, &
- nspec_iso,nspec_tiso,nspec_ani, &
- rho_vp_crust_mantle,rho_vs_crust_mantle, &
- xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
- c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
- c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
- c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
- c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
- c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- ibool_crust_mantle,dummy_idoubling,ispec_is_tiso_crust_mantle, &
- rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle,rmass_ocean_load, &
- READ_KAPPA_MU,READ_TISO, &
- b_rmassx_crust_mantle,b_rmassy_crust_mantle)
- endif
-
- ! check that the number of points in this slice is correct
- if(minval(ibool_crust_mantle(:,:,:,:)) /= 1) &
- call exit_MPI(myrank,'incorrect global numbering: iboolmin is not equal to 1 in crust and mantle')
- if(maxval(ibool_crust_mantle(:,:,:,:)) /= NGLOB_CRUST_MANTLE) &
- call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in crust and mantle')
-
- deallocate(dummy_idoubling)
-
- end subroutine read_mesh_databases_CM
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine read_mesh_databases_OC()
-
-! mesh for OUTER CORE region
-
- use specfem_par
- use specfem_par_outercore
-
- implicit none
-
- ! local parameters
- integer :: nspec_iso,nspec_tiso,nspec_ani,NGLOB_XY_dummy
- logical :: READ_KAPPA_MU,READ_TISO
- integer :: ier
-
- ! dummy array that does not need to be actually read
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
- real(kind=CUSTOM_REAL), dimension(:), allocatable :: dummy_rmass
-
- logical, dimension(:), allocatable :: dummy_ispec_is_tiso
- integer, dimension(:), allocatable :: dummy_idoubling_outer_core
-
- ! outer core (no anisotropy nor S velocity)
- ! rmass_ocean_load is not used in this routine because it is meaningless in the outer core
- READ_KAPPA_MU = .false.
- READ_TISO = .false.
- nspec_iso = NSPEC_OUTER_CORE
- nspec_tiso = 1
- nspec_ani = 1
-
- ! dummy allocation
- NGLOB_XY_dummy = 1
-
- allocate(dummy_rmass(NGLOB_XY_dummy), &
- dummy_ispec_is_tiso(NSPEC_OUTER_CORE), &
- dummy_idoubling_outer_core(NSPEC_OUTER_CORE), &
- stat=ier)
- if(ier /= 0) stop 'error allocating dummy rmass and dummy ispec/idoubling in outer core'
-
- ! allocates mass matrices in this slice (will be fully assembled in the solver)
- !
- ! in the case of Stacey boundary conditions, add C*deltat/2 contribution to the mass matrix
- ! on Stacey edges for the crust_mantle and outer_core regions but not for the inner_core region
- ! thus the mass matrix must be replaced by three mass matrices including the "C" damping matrix
- !
- ! if absorbing_conditions are not set or if NCHUNKS=6, only one mass matrix is needed
- ! for the sake of performance, only "rmassz" array will be filled and "rmassx" & "rmassy" will be obsolete
- allocate(rmass_outer_core(NGLOB_OUTER_CORE),stat=ier)
- if(ier /= 0) stop 'error allocating rmass in outer core'
-
- if( ADIOS_ENABLED .and. ADIOS_FOR_ARRAYS_SOLVER ) then
- call read_arrays_solver_adios(IREGION_OUTER_CORE,myrank, &
- NSPEC_OUTER_CORE,NGLOB_OUTER_CORE,NGLOB_XY_dummy, &
- nspec_iso,nspec_tiso,nspec_ani, &
- vp_outer_core,dummy_array, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- rhostore_outer_core,kappavstore_outer_core,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- ibool_outer_core,dummy_idoubling_outer_core,dummy_ispec_is_tiso, &
- dummy_rmass,dummy_rmass,rmass_outer_core,rmass_ocean_load, &
- READ_KAPPA_MU,READ_TISO, &
- dummy_rmass,dummy_rmass)
- else
- call read_arrays_solver(IREGION_OUTER_CORE,myrank, &
- NSPEC_OUTER_CORE,NGLOB_OUTER_CORE,NGLOB_XY_dummy, &
- nspec_iso,nspec_tiso,nspec_ani, &
- vp_outer_core,dummy_array, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- rhostore_outer_core,kappavstore_outer_core,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- ibool_outer_core,dummy_idoubling_outer_core,dummy_ispec_is_tiso, &
- dummy_rmass,dummy_rmass,rmass_outer_core,rmass_ocean_load, &
- READ_KAPPA_MU,READ_TISO, &
- dummy_rmass,dummy_rmass)
- endif
-
- deallocate(dummy_idoubling_outer_core,dummy_ispec_is_tiso,dummy_rmass)
-
- ! check that the number of points in this slice is correct
- ! check that the number of points in this slice is correct
- if(minval(ibool_outer_core(:,:,:,:)) /= 1) &
- call exit_MPI(myrank,'incorrect global numbering: iboolmin is not equal to 1 in outer core')
- if(maxval(ibool_outer_core(:,:,:,:)) /= NGLOB_OUTER_CORE) then
- call exit_MPI(myrank, 'incorrect global numbering: &
- & iboolmax does not equal nglob in outer core')
- endif
-
- end subroutine read_mesh_databases_OC
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine read_mesh_databases_IC()
-
-! mesh for INNER CORE region
-
- use specfem_par
- use specfem_par_innercore
- implicit none
-
- ! local parameters
- integer :: nspec_iso,nspec_tiso,nspec_ani
- logical :: READ_KAPPA_MU,READ_TISO
- integer :: ier
-
- ! dummy array that does not need to be actually read
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
- logical, dimension(:),allocatable:: dummy_ispec_is_tiso
-
- ! inner core (no anisotropy)
- ! rmass_ocean_load is not used in this routine because it is meaningless in the inner core
- READ_KAPPA_MU = .true. ! (muvstore needed for attenuation)
- READ_TISO = .false.
- nspec_iso = NSPEC_INNER_CORE
- nspec_tiso = 1
- if(ANISOTROPIC_INNER_CORE_VAL) then
- nspec_ani = NSPEC_INNER_CORE
- else
- nspec_ani = 1
- endif
-
- allocate(dummy_ispec_is_tiso(NSPEC_INNER_CORE), &
- stat=ier)
- if(ier /= 0) stop 'error allocating dummy ispec in inner core'
-
- ! allocates mass matrices in this slice (will be fully assembled in the solver)
- !
- ! in the case of Stacey boundary conditions, add C*deltat/2 contribution to the mass matrix
- ! on Stacey edges for the crust_mantle and outer_core regions but not for the inner_core region
- ! thus the mass matrix must be replaced by three mass matrices including the "C" damping matrix
- !
- ! if absorbing_conditions are not set or if NCHUNKS=6, only one mass matrix is needed
- ! for the sake of performance, only "rmassz" array will be filled and "rmassx" & "rmassy" will be obsolete
- allocate(rmassx_inner_core(NGLOB_XY_IC), &
- rmassy_inner_core(NGLOB_XY_IC),stat=ier)
- if(ier /= 0) stop 'error allocating rmassx, rmassy in inner_core'
-
- allocate(rmass_inner_core(NGLOB_INNER_CORE),stat=ier)
- if(ier /= 0) stop 'error allocating rmass in inner core'
-
- allocate(b_rmassx_inner_core(NGLOB_XY_IC), &
- b_rmassy_inner_core(NGLOB_XY_IC),stat=ier)
- if(ier /= 0) stop 'error allocating b_rmassx, b_rmassy in inner_core'
-
- ! reads in arrays
- if( ADIOS_ENABLED .and. ADIOS_FOR_ARRAYS_SOLVER ) then
- call read_arrays_solver_adios(IREGION_INNER_CORE,myrank, &
- NSPEC_INNER_CORE,NGLOB_INNER_CORE,NGLOB_XY_IC, &
- nspec_iso,nspec_tiso,nspec_ani, &
- dummy_array,dummy_array, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
- rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
- dummy_array,dummy_array,dummy_array, &
- c11store_inner_core,c12store_inner_core,c13store_inner_core, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,c33store_inner_core, &
- dummy_array,dummy_array,dummy_array, &
- c44store_inner_core,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- ibool_inner_core,idoubling_inner_core,dummy_ispec_is_tiso, &
- rmassx_inner_core,rmassy_inner_core,rmass_inner_core,rmass_ocean_load, &
- READ_KAPPA_MU,READ_TISO, &
- b_rmassx_inner_core,b_rmassy_inner_core)
- else
- call read_arrays_solver(IREGION_INNER_CORE,myrank, &
- NSPEC_INNER_CORE,NGLOB_INNER_CORE,NGLOB_XY_IC, &
- nspec_iso,nspec_tiso,nspec_ani, &
- dummy_array,dummy_array, &
- xstore_inner_core,ystore_inner_core,zstore_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core, &
- gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
- rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
- dummy_array,dummy_array,dummy_array, &
- c11store_inner_core,c12store_inner_core,c13store_inner_core, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,c33store_inner_core, &
- dummy_array,dummy_array,dummy_array, &
- c44store_inner_core,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- ibool_inner_core,idoubling_inner_core,dummy_ispec_is_tiso, &
- rmassx_inner_core,rmassy_inner_core,rmass_inner_core,rmass_ocean_load, &
- READ_KAPPA_MU,READ_TISO, &
- b_rmassx_inner_core,b_rmassy_inner_core)
- endif
-
- deallocate(dummy_ispec_is_tiso)
-
- ! check that the number of points in this slice is correct
- if(minval(ibool_inner_core(:,:,:,:)) /= 1 .or. maxval(ibool_inner_core(:,:,:,:)) /= NGLOB_INNER_CORE) &
- call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in inner core')
-
- end subroutine read_mesh_databases_IC
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine read_mesh_databases_coupling()
-
-! to couple mantle with outer core
-
- use specfem_par
- use specfem_par_crustmantle
- use specfem_par_innercore
- use specfem_par_outercore
-
- implicit none
-
- ! local parameters
- integer :: njunk1,njunk2,njunk3
- integer :: ier
-
- ! user output
- if( myrank == 0 ) write(IMAIN,*) 'reading coupling surfaces...'
-
- ! crust and mantle
- ! create name of database
- call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
-
- ! Stacey put back
- open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
- status='old',form='unformatted',action='read',iostat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error opening crust_mantle boundary.bin file')
-
- read(27) nspec2D_xmin_crust_mantle
- read(27) nspec2D_xmax_crust_mantle
- read(27) nspec2D_ymin_crust_mantle
- read(27) nspec2D_ymax_crust_mantle
- read(27) njunk1
- read(27) njunk2
-
-! boundary parameters
- read(27) ibelm_xmin_crust_mantle
- read(27) ibelm_xmax_crust_mantle
- read(27) ibelm_ymin_crust_mantle
- read(27) ibelm_ymax_crust_mantle
- read(27) ibelm_bottom_crust_mantle
- read(27) ibelm_top_crust_mantle
-
- read(27) normal_xmin_crust_mantle
- read(27) normal_xmax_crust_mantle
- read(27) normal_ymin_crust_mantle
- read(27) normal_ymax_crust_mantle
- read(27) normal_bottom_crust_mantle
- read(27) normal_top_crust_mantle
-
- read(27) jacobian2D_xmin_crust_mantle
- read(27) jacobian2D_xmax_crust_mantle
- read(27) jacobian2D_ymin_crust_mantle
- read(27) jacobian2D_ymax_crust_mantle
- read(27) jacobian2D_bottom_crust_mantle
- read(27) jacobian2D_top_crust_mantle
- close(27)
-
- ! read parameters to couple fluid and solid regions
- !
- ! outer core
-
- ! create name of database
- call create_name_database(prname,myrank,IREGION_OUTER_CORE,LOCAL_PATH)
-
- ! boundary parameters
-
- ! Stacey put back
- open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
- status='old',form='unformatted',action='read',iostat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error opening outer_core boundary.bin file')
-
- read(27) nspec2D_xmin_outer_core
- read(27) nspec2D_xmax_outer_core
- read(27) nspec2D_ymin_outer_core
- read(27) nspec2D_ymax_outer_core
- read(27) njunk1
- read(27) njunk2
-
- nspec2D_zmin_outer_core = NSPEC2D_BOTTOM(IREGION_OUTER_CORE)
-
- read(27) ibelm_xmin_outer_core
- read(27) ibelm_xmax_outer_core
- read(27) ibelm_ymin_outer_core
- read(27) ibelm_ymax_outer_core
- read(27) ibelm_bottom_outer_core
- read(27) ibelm_top_outer_core
-
- read(27) normal_xmin_outer_core
- read(27) normal_xmax_outer_core
- read(27) normal_ymin_outer_core
- read(27) normal_ymax_outer_core
- read(27) normal_bottom_outer_core
- read(27) normal_top_outer_core
-
- read(27) jacobian2D_xmin_outer_core
- read(27) jacobian2D_xmax_outer_core
- read(27) jacobian2D_ymin_outer_core
- read(27) jacobian2D_ymax_outer_core
- read(27) jacobian2D_bottom_outer_core
- read(27) jacobian2D_top_outer_core
- close(27)
-
- !
- ! inner core
- !
-
- ! create name of database
- call create_name_database(prname,myrank,IREGION_INNER_CORE,LOCAL_PATH)
-
- ! read info for vertical edges for central cube matching in inner core
- open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
- status='old',form='unformatted',action='read',iostat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error opening inner_core boundary.bin file')
-
- read(27) nspec2D_xmin_inner_core
- read(27) nspec2D_xmax_inner_core
- read(27) nspec2D_ymin_inner_core
- read(27) nspec2D_ymax_inner_core
- read(27) njunk1
- read(27) njunk2
-
- ! boundary parameters
- read(27) ibelm_xmin_inner_core
- read(27) ibelm_xmax_inner_core
- read(27) ibelm_ymin_inner_core
- read(27) ibelm_ymax_inner_core
- read(27) ibelm_bottom_inner_core
- read(27) ibelm_top_inner_core
- close(27)
-
- ! -- Boundary Mesh for crust and mantle ---
- if (SAVE_BOUNDARY_MESH .and. SIMULATION_TYPE == 3) then
-
- call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
-
- open(unit=27,file=prname(1:len_trim(prname))//'boundary_disc.bin', &
- status='old',form='unformatted',action='read',iostat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error opening boundary_disc.bin file')
-
- read(27) njunk1,njunk2,njunk3
- if (njunk1 /= NSPEC2D_MOHO .and. njunk2 /= NSPEC2D_400 .and. njunk3 /= NSPEC2D_670) &
- call exit_mpi(myrank, 'Error reading ibelm_disc.bin file')
- read(27) ibelm_moho_top
- read(27) ibelm_moho_bot
- read(27) ibelm_400_top
- read(27) ibelm_400_bot
- read(27) ibelm_670_top
- read(27) ibelm_670_bot
- read(27) normal_moho
- read(27) normal_400
- read(27) normal_670
- close(27)
-
- k_top = 1
- k_bot = NGLLZ
-
- ! initialization
- moho_kl = 0.; d400_kl = 0.; d670_kl = 0.; cmb_kl = 0.; icb_kl = 0.
- endif
-
- end subroutine read_mesh_databases_coupling
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine read_mesh_databases_addressing()
-
- use specfem_par
- use specfem_par_crustmantle
- use specfem_par_innercore
- use specfem_par_outercore
-
- implicit none
-
- ! local parameters
- integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
- integer, dimension(0:NPROCTOT_VAL-1) :: ichunk_slice,iproc_xi_slice,iproc_eta_slice
- integer :: ier,iproc,iproc_read,iproc_xi,iproc_eta
-
- ! open file with global slice number addressing
- if(myrank == 0) then
- open(unit=IIN,file=trim(OUTPUT_FILES)//'/addressing.txt',status='old',action='read',iostat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error opening addressing.txt')
-
- do iproc = 0,NPROCTOT_VAL-1
- read(IIN,*) iproc_read,ichunk,iproc_xi,iproc_eta
-
- if(iproc_read /= iproc) call exit_MPI(myrank,'incorrect slice number read')
-
- addressing(ichunk,iproc_xi,iproc_eta) = iproc
- ichunk_slice(iproc) = ichunk
- iproc_xi_slice(iproc) = iproc_xi
- iproc_eta_slice(iproc) = iproc_eta
- enddo
- close(IIN)
- endif
-
- ! broadcast the information read on the master to the nodes
- call bcast_all_i(addressing,NCHUNKS_VAL*NPROC_XI_VAL*NPROC_ETA_VAL)
- call bcast_all_i(ichunk_slice,NPROCTOT_VAL)
- call bcast_all_i(iproc_xi_slice,NPROCTOT_VAL)
- call bcast_all_i(iproc_eta_slice,NPROCTOT_VAL)
-
- ! output a topology map of slices - fix 20x by nproc
- if (myrank == 0 ) then
- if( NCHUNKS_VAL == 6 .and. NPROCTOT_VAL < 1000 ) then
- write(IMAIN,*) 'Spatial distribution of the slices'
- do iproc_xi = NPROC_XI_VAL-1, 0, -1
- write(IMAIN,'(20x)',advance='no')
- do iproc_eta = NPROC_ETA_VAL -1, 0, -1
- ichunk = CHUNK_AB
- write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
- enddo
- write(IMAIN,'(1x)',advance='yes')
- enddo
- write(IMAIN, *) ' '
- do iproc_xi = NPROC_XI_VAL-1, 0, -1
- write(IMAIN,'(1x)',advance='no')
- do iproc_eta = NPROC_ETA_VAL -1, 0, -1
- ichunk = CHUNK_BC
- write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
- enddo
- write(IMAIN,'(3x)',advance='no')
- do iproc_eta = NPROC_ETA_VAL -1, 0, -1
- ichunk = CHUNK_AC
- write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
- enddo
- write(IMAIN,'(3x)',advance='no')
- do iproc_eta = NPROC_ETA_VAL -1, 0, -1
- ichunk = CHUNK_BC_ANTIPODE
- write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
- enddo
- write(IMAIN,'(1x)',advance='yes')
- enddo
- write(IMAIN, *) ' '
- do iproc_xi = NPROC_XI_VAL-1, 0, -1
- write(IMAIN,'(20x)',advance='no')
- do iproc_eta = NPROC_ETA_VAL -1, 0, -1
- ichunk = CHUNK_AB_ANTIPODE
- write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
- enddo
- write(IMAIN,'(1x)',advance='yes')
- enddo
- write(IMAIN, *) ' '
- do iproc_xi = NPROC_XI_VAL-1, 0, -1
- write(IMAIN,'(20x)',advance='no')
- do iproc_eta = NPROC_ETA_VAL -1, 0, -1
- ichunk = CHUNK_AC_ANTIPODE
- write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
- enddo
- write(IMAIN,'(1x)',advance='yes')
- enddo
- write(IMAIN, *) ' '
- endif
- endif
-
- ! determine chunk number and local slice coordinates using addressing
- ! (needed for Stacey conditions)
- ichunk = ichunk_slice(myrank)
-
- end subroutine read_mesh_databases_addressing
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine read_mesh_databases_MPI()
-
- use specfem_par
- use specfem_par_crustmantle
- use specfem_par_innercore
- use specfem_par_outercore
- implicit none
-
- ! local parameters
- real :: percentage_edge
- integer :: ier
-
- ! read MPI interfaces from file
-
- ! crust mantle
- if( ADIOS_ENABLED .and. ADIOS_FOR_MPI_ARRAYS ) then
- call read_mesh_databases_MPI_CM_adios()
- else
- call read_mesh_databases_MPI_CM()
- endif
-
- allocate(buffer_send_vector_crust_mantle(NDIM,max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
- buffer_recv_vector_crust_mantle(NDIM,max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
- request_send_vector_cm(num_interfaces_crust_mantle), &
- request_recv_vector_cm(num_interfaces_crust_mantle), &
- stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_crust_mantle etc.')
-
- if( SIMULATION_TYPE == 3 ) then
- allocate(b_buffer_send_vector_cm(NDIM,max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
- b_buffer_recv_vector_cm(NDIM,max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
- b_request_send_vector_cm(num_interfaces_crust_mantle), &
- b_request_recv_vector_cm(num_interfaces_crust_mantle), &
- stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_cm etc.')
- endif
-
- ! outer core
- if( ADIOS_ENABLED .and. ADIOS_FOR_MPI_ARRAYS ) then
- call read_mesh_databases_MPI_OC_adios()
- else
- call read_mesh_databases_MPI_OC()
- endif
-
- allocate(buffer_send_scalar_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
- buffer_recv_scalar_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
- request_send_scalar_oc(num_interfaces_outer_core), &
- request_recv_scalar_oc(num_interfaces_outer_core), &
- stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_outer_core etc.')
-
- if( SIMULATION_TYPE == 3 ) then
- allocate(b_buffer_send_scalar_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
- b_buffer_recv_scalar_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
- b_request_send_scalar_oc(num_interfaces_outer_core), &
- b_request_recv_scalar_oc(num_interfaces_outer_core), &
- stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_outer_core etc.')
- endif
-
- ! inner core
- if( ADIOS_ENABLED .and. ADIOS_FOR_MPI_ARRAYS ) then
- call read_mesh_databases_MPI_IC_adios()
- else
- call read_mesh_databases_MPI_IC()
- endif
-
- allocate(buffer_send_vector_inner_core(NDIM,max_nibool_interfaces_ic,num_interfaces_inner_core), &
- buffer_recv_vector_inner_core(NDIM,max_nibool_interfaces_ic,num_interfaces_inner_core), &
- request_send_vector_ic(num_interfaces_inner_core), &
- request_recv_vector_ic(num_interfaces_inner_core), &
- stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_inner_core etc.')
-
- if( SIMULATION_TYPE == 3 ) then
- allocate(b_buffer_send_vector_inner_core(NDIM,max_nibool_interfaces_ic,num_interfaces_inner_core), &
- b_buffer_recv_vector_inner_core(NDIM,max_nibool_interfaces_ic,num_interfaces_inner_core), &
- b_request_send_vector_ic(num_interfaces_inner_core), &
- b_request_recv_vector_ic(num_interfaces_inner_core), &
- stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_inner_core etc.')
- endif
-
-
- ! user output
- if(myrank == 0) then
- write(IMAIN,*) 'for overlapping of communications with calculations:'
- write(IMAIN,*)
-
- percentage_edge = 100. * nspec_outer_crust_mantle / real(NSPEC_CRUST_MANTLE)
- write(IMAIN,*) 'percentage of edge elements in crust/mantle ',percentage_edge,'%'
- write(IMAIN,*) 'percentage of volume elements in crust/mantle ',100. - percentage_edge,'%'
- write(IMAIN,*)
-
- percentage_edge = 100.* nspec_outer_outer_core / real(NSPEC_OUTER_CORE)
- write(IMAIN,*) 'percentage of edge elements in outer core ',percentage_edge,'%'
- write(IMAIN,*) 'percentage of volume elements in outer core ',100. - percentage_edge,'%'
- write(IMAIN,*)
-
- percentage_edge = 100. * nspec_outer_inner_core / real(NSPEC_INNER_CORE)
- write(IMAIN,*) 'percentage of edge elements in inner core ',percentage_edge,'%'
- write(IMAIN,*) 'percentage of volume elements in inner core ',100. - percentage_edge,'%'
- write(IMAIN,*)
- call flush_IMAIN()
- endif
- ! synchronizes MPI processes
- call sync_all()
-
- end subroutine read_mesh_databases_MPI
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine read_mesh_databases_MPI_CM()
-
- use specfem_par
- use specfem_par_crustmantle
- implicit none
-
- ! local parameters
- integer :: ier
-
- ! crust mantle region
-
- ! create the name for the database of the current slide and region
- call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
-
- open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
- status='old',action='read',form='unformatted',iostat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_mpi.bin')
-
- ! MPI interfaces
- read(IIN) num_interfaces_crust_mantle
- allocate(my_neighbours_crust_mantle(num_interfaces_crust_mantle), &
- nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating array my_neighbours_crust_mantle etc.')
-
- if( num_interfaces_crust_mantle > 0 ) then
- read(IIN) max_nibool_interfaces_cm
- allocate(ibool_interfaces_crust_mantle(max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
- stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_crust_mantle')
-
- read(IIN) my_neighbours_crust_mantle
- read(IIN) nibool_interfaces_crust_mantle
- read(IIN) ibool_interfaces_crust_mantle
- else
- ! dummy array
- max_nibool_interfaces_cm = 0
- allocate(ibool_interfaces_crust_mantle(0,0),stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array dummy ibool_interfaces_crust_mantle')
- endif
-
- ! inner / outer elements
- read(IIN) nspec_inner_crust_mantle,nspec_outer_crust_mantle
- read(IIN) num_phase_ispec_crust_mantle
- if( num_phase_ispec_crust_mantle < 0 ) &
- call exit_mpi(myrank,'error num_phase_ispec_crust_mantle is < zero')
-
- allocate(phase_ispec_inner_crust_mantle(num_phase_ispec_crust_mantle,2),&
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating array phase_ispec_inner_crust_mantle')
-
- if(num_phase_ispec_crust_mantle > 0 ) read(IIN) phase_ispec_inner_crust_mantle
-
- ! mesh coloring for GPUs
- if( USE_MESH_COLORING_GPU ) then
- ! colors
- read(IIN) num_colors_outer_crust_mantle,num_colors_inner_crust_mantle
-
- allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle + num_colors_inner_crust_mantle), &
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating num_elem_colors_crust_mantle array')
-
- read(IIN) num_elem_colors_crust_mantle
- else
- ! allocates dummy arrays
- num_colors_outer_crust_mantle = 0
- num_colors_inner_crust_mantle = 0
- allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle + num_colors_inner_crust_mantle), &
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating num_elem_colors_crust_mantle array')
- endif
-
- close(IIN)
-
- end subroutine read_mesh_databases_MPI_CM
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine read_mesh_databases_MPI_OC()
-
- use specfem_par
- use specfem_par_outercore
- implicit none
-
- ! local parameters
- integer :: ier
-
- ! crust mantle region
-
- ! create the name for the database of the current slide and region
- call create_name_database(prname,myrank,IREGION_OUTER_CORE,LOCAL_PATH)
-
- open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
- status='old',action='read',form='unformatted',iostat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_mpi.bin')
-
- ! MPI interfaces
- read(IIN) num_interfaces_outer_core
- allocate(my_neighbours_outer_core(num_interfaces_outer_core), &
- nibool_interfaces_outer_core(num_interfaces_outer_core), &
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating array my_neighbours_outer_core etc.')
-
- if( num_interfaces_outer_core > 0 ) then
- read(IIN) max_nibool_interfaces_oc
- allocate(ibool_interfaces_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
- stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_outer_core')
-
- read(IIN) my_neighbours_outer_core
- read(IIN) nibool_interfaces_outer_core
- read(IIN) ibool_interfaces_outer_core
- else
- ! dummy array
- max_nibool_interfaces_oc = 0
- allocate(ibool_interfaces_outer_core(0,0),stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array dummy ibool_interfaces_outer_core')
- endif
-
- ! inner / outer elements
- read(IIN) nspec_inner_outer_core,nspec_outer_outer_core
- read(IIN) num_phase_ispec_outer_core
- if( num_phase_ispec_outer_core < 0 ) &
- call exit_mpi(myrank,'error num_phase_ispec_outer_core is < zero')
-
- allocate(phase_ispec_inner_outer_core(num_phase_ispec_outer_core,2),&
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating array phase_ispec_inner_outer_core')
-
- if(num_phase_ispec_outer_core > 0 ) read(IIN) phase_ispec_inner_outer_core
-
- ! mesh coloring for GPUs
- if( USE_MESH_COLORING_GPU ) then
- ! colors
- read(IIN) num_colors_outer_outer_core,num_colors_inner_outer_core
-
- allocate(num_elem_colors_outer_core(num_colors_outer_outer_core + num_colors_inner_outer_core), &
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating num_elem_colors_outer_core array')
-
- read(IIN) num_elem_colors_outer_core
- else
- ! allocates dummy arrays
- num_colors_outer_outer_core = 0
- num_colors_inner_outer_core = 0
- allocate(num_elem_colors_outer_core(num_colors_outer_outer_core + num_colors_inner_outer_core), &
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating num_elem_colors_outer_core array')
- endif
-
- close(IIN)
-
- end subroutine read_mesh_databases_MPI_OC
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine read_mesh_databases_MPI_IC()
-
- use specfem_par
- use specfem_par_innercore
- implicit none
-
- ! local parameters
- integer :: ier
-
- ! crust mantle region
-
- ! create the name for the database of the current slide and region
- call create_name_database(prname,myrank,IREGION_INNER_CORE,LOCAL_PATH)
-
- open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
- status='old',action='read',form='unformatted',iostat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_mpi.bin')
-
- ! MPI interfaces
- read(IIN) num_interfaces_inner_core
- allocate(my_neighbours_inner_core(num_interfaces_inner_core), &
- nibool_interfaces_inner_core(num_interfaces_inner_core), &
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating array my_neighbours_inner_core etc.')
-
- if( num_interfaces_inner_core > 0 ) then
- read(IIN) max_nibool_interfaces_ic
- allocate(ibool_interfaces_inner_core(max_nibool_interfaces_ic,num_interfaces_inner_core), &
- stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_inner_core')
-
- read(IIN) my_neighbours_inner_core
- read(IIN) nibool_interfaces_inner_core
- read(IIN) ibool_interfaces_inner_core
- else
- ! dummy array
- max_nibool_interfaces_ic = 0
- allocate(ibool_interfaces_inner_core(0,0),stat=ier)
- if( ier /= 0 ) call exit_mpi(myrank,'error allocating array dummy ibool_interfaces_inner_core')
- endif
-
- ! inner / outer elements
- read(IIN) nspec_inner_inner_core,nspec_outer_inner_core
- read(IIN) num_phase_ispec_inner_core
- if( num_phase_ispec_inner_core < 0 ) &
- call exit_mpi(myrank,'error num_phase_ispec_inner_core is < zero')
-
- allocate(phase_ispec_inner_inner_core(num_phase_ispec_inner_core,2),&
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating array phase_ispec_inner_inner_core')
-
- if(num_phase_ispec_inner_core > 0 ) read(IIN) phase_ispec_inner_inner_core
-
- ! mesh coloring for GPUs
- if( USE_MESH_COLORING_GPU ) then
- ! colors
- read(IIN) num_colors_outer_inner_core,num_colors_inner_inner_core
-
- allocate(num_elem_colors_inner_core(num_colors_outer_inner_core + num_colors_inner_inner_core), &
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating num_elem_colors_inner_core array')
-
- read(IIN) num_elem_colors_inner_core
- else
- ! allocates dummy arrays
- num_colors_outer_inner_core = 0
- num_colors_inner_inner_core = 0
- allocate(num_elem_colors_inner_core(num_colors_outer_inner_core + num_colors_inner_inner_core), &
- stat=ier)
- if( ier /= 0 ) &
- call exit_mpi(myrank,'error allocating num_elem_colors_inner_core array')
- endif
-
- close(IIN)
-
- end subroutine read_mesh_databases_MPI_IC
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
- subroutine read_mesh_databases_stacey()
-
- use specfem_par
- use specfem_par_crustmantle
- use specfem_par_innercore
- use specfem_par_outercore
-
- implicit none
-
- ! local parameters
- integer :: ier
-
- ! crust and mantle
-
- ! create name of database
- call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
-
- ! read arrays for Stacey conditions
- open(unit=27,file=prname(1:len_trim(prname))//'stacey.bin', &
- status='old',form='unformatted',action='read',iostat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error opening stacey.bin file for crust mantle')
-
- read(27) nimin_crust_mantle
- read(27) nimax_crust_mantle
- read(27) njmin_crust_mantle
- read(27) njmax_crust_mantle
- read(27) nkmin_xi_crust_mantle
- read(27) nkmin_eta_crust_mantle
- close(27)
-
- ! outer core
-
- ! create name of database
- call create_name_database(prname,myrank,IREGION_OUTER_CORE,LOCAL_PATH)
-
- ! read arrays for Stacey conditions
- open(unit=27,file=prname(1:len_trim(prname))//'stacey.bin', &
- status='old',form='unformatted',action='read',iostat=ier)
- if( ier /= 0 ) call exit_MPI(myrank,'error opening stacey.bin file for outer core')
-
- read(27) nimin_outer_core
- read(27) nimax_outer_core
- read(27) njmin_outer_core
- read(27) njmax_outer_core
- read(27) nkmin_xi_outer_core
- read(27) nkmin_eta_outer_core
- close(27)
-
- end subroutine read_mesh_databases_stacey
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/rules.mk
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/rules.mk 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/rules.mk 2013-08-30 12:45:44 UTC (rev 22746)
@@ -236,7 +236,7 @@
${E}/xspecfem3D: $(XSPECFEM_OBJECTS)
@echo "building xspecfem3D with CUDA 5 support"
${NVCCLINK} -o $(cuda_DEVICE_OBJ) $(cuda_OBJECTS)
- ${FCLINK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(cuda_DEVICE_OBJ) $(MPILIBS) $(CUDA_LINK)
+ ${FCLINK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(cuda_DEVICE_OBJ) $(MPILIBS) $(CUDA_LINK)
else
## cuda 4 version
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_regular_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_regular_kernels.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_regular_kernels.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -25,54 +25,13 @@
!
!=====================================================================
- subroutine save_regular_kernels_crust_mantle(myrank, &
- npoints_slice, hxir_reg, hetar_reg, hgammar_reg, ispec_reg, &
- scale_t,scale_displ, &
- cijkl_kl_crust_mantle,rho_kl_crust_mantle, &
- alpha_kl_crust_mantle,beta_kl_crust_mantle, &
- ystore_crust_mantle,zstore_crust_mantle, &
- rhostore_crust_mantle,muvstore_crust_mantle, &
- kappavstore_crust_mantle,ibool_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle, &
- eta_anisostore_crust_mantle,ispec_is_tiso_crust_mantle, &
- LOCAL_PATH)
+ subroutine save_regular_kernels_crust_mantle()
- use constants_solver
- use specfem_par,only: ANISOTROPIC_KL,SAVE_TRANSVERSE_KL_ONLY
+ use specfem_par
+ use specfem_par_crustmantle
implicit none
- integer myrank
-
- integer, intent(in) :: npoints_slice
- real, dimension(NGLLX, NM_KL_REG_PTS_VAL), intent(in) :: hxir_reg
- real, dimension(NGLLY, NM_KL_REG_PTS_VAL), intent(in) :: hetar_reg
- real, dimension(NGLLZ, NM_KL_REG_PTS_VAL), intent(in) :: hgammar_reg
- integer, dimension(NM_KL_REG_PTS_VAL), intent(in) :: ispec_reg
-
- double precision :: scale_t,scale_displ
-
- real(kind=CUSTOM_REAL), dimension(21,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT_ANISO_KL) :: &
- cijkl_kl_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
- rho_kl_crust_mantle, beta_kl_crust_mantle, alpha_kl_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: &
- ystore_crust_mantle,zstore_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle
-
- logical, dimension(NSPEC_CRUST_MANTLE) :: ispec_is_tiso_crust_mantle
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
-
- character(len=150) LOCAL_PATH
-
! local parameters
real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: &
cijkl_kl_crust_mantle_reg
@@ -85,7 +44,7 @@
real(kind=CUSTOM_REAL) :: rhol,mul,kappal,rho_kl,alpha_kl,beta_kl
real(kind=CUSTOM_REAL) :: alphah_kl,alphav_kl,betah_kl,betav_kl,rhonotprime_kl
integer :: ispec,i,j,k,iglob
- character(len=150) prname
+! character(len=150) prname
double precision :: hlagrange
integer :: ipoint
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_GLL_points.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_GLL_points.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_GLL_points.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -28,10 +28,11 @@
subroutine setup_GLL_points()
use specfem_par
+
implicit none
! local parameters
- integer :: i,j
+ integer :: i,j,k
! set up GLL points, weights and derivation matrices
call define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
@@ -39,9 +40,21 @@
hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube)
+ ! define a 3D extension in order to be able to force vectorization in the compute_forces routines
+ if( FORCE_VECTORIZATION_VAL ) then
+ do k = 1,NGLLZ
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ wgllwgll_yz_3D(i,j,k) = wgllwgll_yz(j,k)
+ wgllwgll_xz_3D(i,j,k) = wgllwgll_xz(i,k)
+ wgllwgll_xy_3D(i,j,k) = wgllwgll_xy(i,j)
+ enddo
+ enddo
+ enddo
+ endif
+
if( USE_DEVILLE_PRODUCTS_VAL ) then
-
- ! check that optimized routines from Deville et al. (2002) can be used
+ ! check that optimized routines from Deville et al. (2002) can be used
if(NGLLX /= 5 .or. NGLLY /= 5 .or. NGLLZ /= 5) &
stop 'Deville et al. (2002) routines can only be used if NGLLX = NGLLY = NGLLZ = 5'
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -72,6 +72,8 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+ ! arrays for force_vectorization
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D
!-----------------------------------------------------------------
! attenuation parameters
@@ -278,6 +280,21 @@
! ADJOINT
real(kind=CUSTOM_REAL) :: b_deltat,b_deltatover2,b_deltatsqover2
+ ! this is for LDDRK
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: displ_crust_mantle_lddrk,veloc_crust_mantle_lddrk
+ real(kind=CUSTOM_REAL), dimension(:), allocatable :: displ_outer_core_lddrk,veloc_outer_core_lddrk
+ real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: displ_inner_core_lddrk,veloc_inner_core_lddrk
+
+ real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: A_array_rotation_lddrk,B_array_rotation_lddrk
+ real(kind=CUSTOM_REAL),dimension(:,:,:,:,:,:), allocatable :: R_memory_crust_mantle_lddrk
+ real(kind=CUSTOM_REAL),dimension(:,:,:,:,:,:), allocatable :: R_memory_inner_core_lddrk
+
+ integer :: NSTAGE_TIME_SCHEME,istage
+
+#ifdef USE_SERIAL_CASCADE_FOR_IOs
+ logical :: you_can_start_doing_IOs
+#endif
+
end module specfem_par
@@ -411,7 +428,7 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STACEY) :: &
rho_vp_crust_mantle,rho_vs_crust_mantle
integer :: nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
- nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle
+ nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle
integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_CM) :: nimin_crust_mantle,nimax_crust_mantle,nkmin_eta_crust_mantle
integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_CM) :: njmin_crust_mantle,njmax_crust_mantle,nkmin_xi_crust_mantle
@@ -443,6 +460,14 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_670) :: d670_kl, d670_kl_top, d670_kl_bot
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_CMB) :: cmb_kl, cmb_kl_top, cmb_kl_bot
+ ! For saving kernels on a regular grid
+ integer :: npoints_slice
+ integer, dimension(NM_KL_REG_PTS_VAL) :: points_slice
+ integer, dimension(NM_KL_REG_PTS_VAL) :: ispec_reg
+ real, dimension(NGLLX, NM_KL_REG_PTS_VAL) :: hxir_reg
+ real, dimension(NGLLY, NM_KL_REG_PTS_VAL) :: hetar_reg
+ real, dimension(NGLLZ, NM_KL_REG_PTS_VAL) :: hgammar_reg
+
! NOISE_TOMOGRAPHY
real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: noise_sourcearray
real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
@@ -614,8 +639,8 @@
! Stacey
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_STACEY) :: vp_outer_core
integer :: nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
- nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
- nspec2D_zmin_outer_core
+ nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
+ nspec2D_zmin_outer_core
integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_OC) :: nimin_outer_core,nimax_outer_core,nkmin_eta_outer_core
integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_OC) :: njmin_outer_core,njmax_outer_core,nkmin_xi_outer_core
@@ -624,7 +649,7 @@
absorb_zmin_outer_core
integer :: reclen_xmin_outer_core, reclen_xmax_outer_core, &
- reclen_ymin_outer_core, reclen_ymax_outer_core
+ reclen_ymin_outer_core, reclen_ymax_outer_core
integer :: reclen_zmin
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_OUTER_CORE_ADJOINT) :: &
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/update_displacement_Newmark.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/update_displacement_Newmark.f90 2013-08-30 00:30:39 UTC (rev 22745)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/update_displacement_Newmark.f90 2013-08-30 12:45:44 UTC (rev 22746)
@@ -66,7 +66,7 @@
! updates wavefields
if( .not. GPU_MODE) then
- ! on CPU
+ ! on CPU
! Newmark time scheme update
! mantle
@@ -78,54 +78,76 @@
! inner core
call update_displ_elastic(NGLOB_INNER_CORE,displ_inner_core,veloc_inner_core,accel_inner_core, &
deltat,deltatover2,deltatsqover2)
+ else
+ ! on GPU
+ ! Includes FORWARD_OR_ADJOINT == 1
+ ! outer core region
+ call it_update_displacement_oc_cuda(Mesh_pointer,deltat,deltatsqover2,deltatover2,1)
+ ! inner core region
+ call it_update_displacement_ic_cuda(Mesh_pointer,deltat,deltatsqover2,deltatover2,1)
+ ! crust/mantle region
+ call it_update_displacement_cm_cuda(Mesh_pointer,deltat,deltatsqover2,deltatover2,1)
+ endif
- ! backward field
- if (SIMULATION_TYPE == 3) then
- ! mantle
- call update_displ_elastic(NGLOB_CRUST_MANTLE,b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
- b_deltat,b_deltatover2,b_deltatsqover2)
- ! outer core
- call update_displ_acoustic(NGLOB_OUTER_CORE,b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core, &
- b_deltat,b_deltatover2,b_deltatsqover2)
- ! inner core
- call update_displ_elastic(NGLOB_INNER_CORE,b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
- b_deltat,b_deltatover2,b_deltatsqover2)
- endif
+ end subroutine update_displacement_Newmark
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+ subroutine update_displacement_Newmark_backward()
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+
+ implicit none
+
+ ! checks
+ if( SIMULATION_TYPE /= 3 ) return
+
+ ! updates wavefields
+ if( .not. GPU_MODE) then
+ ! on CPU
+ ! Newmark time scheme update for backward/reconstructed fields
+ ! mantle
+ call update_displ_elastic(NGLOB_CRUST_MANTLE_ADJOINT,b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
+ b_deltat,b_deltatover2,b_deltatsqover2)
+ ! outer core
+ call update_displ_acoustic(NGLOB_OUTER_CORE_ADJOINT,b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core, &
+ b_deltat,b_deltatover2,b_deltatsqover2)
+ ! inner core
+ call update_displ_elastic(NGLOB_INNER_CORE_ADJOINT,b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
+ b_deltat,b_deltatover2,b_deltatsqover2)
else
! on GPU
- ! Includes SIM_TYPE 1 & 3
-
+ ! Includes FORWARD_OR_ADJOINT == 3
! outer core region
- call it_update_displacement_oc_cuda(Mesh_pointer, &
- deltat, deltatsqover2, deltatover2, &
- b_deltat, b_deltatsqover2, b_deltatover2)
+ call it_update_displacement_oc_cuda(Mesh_pointer,b_deltat,b_deltatsqover2,b_deltatover2,3)
! inner core region
- call it_update_displacement_ic_cuda(Mesh_pointer, &
- deltat, deltatsqover2, deltatover2, &
- b_deltat, b_deltatsqover2, b_deltatover2)
-
+ call it_update_displacement_ic_cuda(Mesh_pointer,b_deltat,b_deltatsqover2,b_deltatover2,3)
! crust/mantle region
- call it_update_displacement_cm_cuda(Mesh_pointer, &
- deltat, deltatsqover2, deltatover2, &
- b_deltat, b_deltatsqover2, b_deltatover2)
+ call it_update_displacement_cm_cuda(Mesh_pointer,b_deltat,b_deltatsqover2,b_deltatover2,3)
endif
- end subroutine update_displacement_Newmark
+ end subroutine update_displacement_Newmark_backward
!
!-------------------------------------------------------------------------------------------------
!
- subroutine update_displ_elastic(nglob,displ,veloc,accel, &
+
+ subroutine update_displ_elastic(NGLOB,displ,veloc,accel, &
deltat,deltatover2,deltatsqover2)
use constants_solver,only: CUSTOM_REAL,NDIM,FORCE_VECTORIZATION_VAL
implicit none
- integer,intent(in) :: nglob
- real(kind=CUSTOM_REAL),dimension(NDIM,nglob),intent(inout) :: displ,veloc,accel
+ integer,intent(in) :: NGLOB
+ real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB),intent(inout) :: displ,veloc,accel
real(kind=CUSTOM_REAL),intent(in) :: deltat,deltatover2,deltatsqover2
! local parameters
@@ -133,13 +155,13 @@
! Newmark time scheme update
if(FORCE_VECTORIZATION_VAL) then
- do i=1,nglob * NDIM
+ do i=1,NGLOB * NDIM
displ(i,1) = displ(i,1) + deltat * veloc(i,1) + deltatsqover2 * accel(i,1)
veloc(i,1) = veloc(i,1) + deltatover2 * accel(i,1)
accel(i,1) = 0._CUSTOM_REAL
enddo
else
- do i=1,nglob
+ do i=1,NGLOB
displ(:,i) = displ(:,i) + deltat * veloc(:,i) + deltatsqover2 * accel(:,i)
veloc(:,i) = veloc(:,i) + deltatover2 * accel(:,i)
accel(:,i) = 0._CUSTOM_REAL
@@ -153,22 +175,22 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine update_displ_acoustic(nglob,displ,veloc,accel, &
+ subroutine update_displ_acoustic(NGLOB,displ,veloc,accel, &
deltat,deltatover2,deltatsqover2)
use constants,only: CUSTOM_REAL
implicit none
- integer,intent(in) :: nglob
- real(kind=CUSTOM_REAL),dimension(nglob),intent(inout) :: displ,veloc,accel
+ integer,intent(in) :: NGLOB
+ real(kind=CUSTOM_REAL),dimension(NGLOB),intent(inout) :: displ,veloc,accel
real(kind=CUSTOM_REAL),intent(in) :: deltat,deltatover2,deltatsqover2
! local parameters
integer :: i
! Newmark time scheme update
- do i=1,nglob
+ do i=1,NGLOB
displ(i) = displ(i) + deltat * veloc(i) + deltatsqover2 * accel(i)
veloc(i) = veloc(i) + deltatover2 * accel(i)
accel(i) = 0._CUSTOM_REAL
@@ -218,17 +240,18 @@
!
subroutine update_accel_elastic(NGLOB,NGLOB_XY,veloc_crust_mantle,accel_crust_mantle, &
- two_omega_earth, &
- rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle, &
- NCHUNKS_VAL,ABSORBING_CONDITIONS)
+ two_omega_earth, &
+ rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle)
! updates acceleration in crust/mantle region
- use constants_solver,only: CUSTOM_REAL,NDIM
+ use constants_solver,only: CUSTOM_REAL,NDIM,NCHUNKS_VAL
+ use specfem_par,only: ABSORBING_CONDITIONS
+
implicit none
- integer :: NGLOB,NGLOB_XY,NCHUNKS_VAL
+ integer :: NGLOB,NGLOB_XY
! velocity & acceleration
! crust/mantle region
@@ -248,8 +271,6 @@
real(kind=CUSTOM_REAL) :: two_omega_earth
- logical :: ABSORBING_CONDITIONS
-
! local parameters
integer :: i
@@ -285,12 +306,12 @@
!
subroutine update_veloc_elastic(NGLOB_CM,veloc_crust_mantle,accel_crust_mantle, &
- NGLOB_IC,veloc_inner_core,accel_inner_core, &
- deltatover2,two_omega_earth,rmass_inner_core)
+ NGLOB_IC,veloc_inner_core,accel_inner_core, &
+ deltatover2,two_omega_earth,rmass_inner_core)
! updates velocity in crust/mantle region, and acceleration and velocity in inner core
- use constants_solver,only: CUSTOM_REAL,NDIM
+ use constants_solver,only: CUSTOM_REAL,NDIM,FORCE_VECTORIZATION_VAL
implicit none
@@ -320,19 +341,38 @@
! needs both, acceleration update & velocity corrector terms
! mantle
- do i=1,NGLOB_CM
- veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
- enddo
+ if(FORCE_VECTORIZATION_VAL) then
+ do i=1,NGLOB_CM * NDIM
+ veloc_crust_mantle(i,i) = veloc_crust_mantle(i,i) + deltatover2*accel_crust_mantle(i,i)
+ enddo
+ else
+ do i=1,NGLOB_CM
+ veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
+ enddo
+ endif
! inner core
- do i=1,NGLOB_IC
- accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
- + two_omega_earth*veloc_inner_core(2,i)
- accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
- - two_omega_earth*veloc_inner_core(1,i)
- accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
+ if(FORCE_VECTORIZATION_VAL) then
+ do i=1,NGLOB_IC
+ accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
+ + two_omega_earth*veloc_inner_core(2,i)
+ accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
+ - two_omega_earth*veloc_inner_core(1,i)
+ accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
+ enddo
+ do i=1,NGLOB_IC * NDIM
+ veloc_inner_core(i,1) = veloc_inner_core(i,1) + deltatover2*accel_inner_core(i,1)
+ enddo
+ else
+ do i=1,NGLOB_IC
+ accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
+ + two_omega_earth*veloc_inner_core(2,i)
+ accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
+ - two_omega_earth*veloc_inner_core(1,i)
+ accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
- veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
- enddo
+ veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
+ enddo
+ endif
end subroutine update_veloc_elastic
More information about the CIG-COMMITS
mailing list