[cig-commits] r22977 - in seismo/3D/SPECFEM3D_GLOBE/trunk: . src/cuda src/meshfem3D src/shared src/specfem3D utils/attenuation
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Mon Nov 4 00:16:43 PST 2013
Author: danielpeter
Date: 2013-11-04 00:16:42 -0800 (Mon, 04 Nov 2013)
New Revision: 22977
Modified:
seismo/3D/SPECFEM3D_GLOBE/trunk/Makefile.in
seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/compute_forces_crust_mantle_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/compute_forces_inner_core_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/create_central_cube.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/create_regions_mesh.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/get_model.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/get_perm_color.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/setup_color_perm.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/save_header_file.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_boundary_kernel.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_att_memory.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_strain.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_noDev.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_noDev.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_kernels.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_seismograms.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/get_attenuation.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/prepare_timerun.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_attenuation_adios.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/update_displacement_LDDRK.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/update_displacement_Newmark.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/utils/attenuation/attenuation_output.f90
Log:
switches indexing of arrays epsilondev_loc, rho_s_H, sum_terms, R_xx,.. and factor_common for better vectorization; removes equivalence statements, uses Deville subroutine calls instead; updates gradient calculations for acoustic kernels in outer core
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/Makefile.in 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/Makefile.in 2013-11-04 08:16:42 UTC (rev 22977)
@@ -201,11 +201,11 @@
ifdef CLEAN
clean:
@echo "cleaning by CLEAN defined"
- -rm -f $(foreach dir, $(CLEAN), $($(dir)_OBJECTS) $($(dir)_MODULES) $($(dir)_SHARED_OBJECTS) $($(dir)_TARGETS))
+ -rm -f $(foreach dir, $(CLEAN), $($(dir)_OBJECTS) $($(dir)_MODULES) $($(dir)_SHARED_OBJECTS) $($(dir)_TARGETS)) $O/*
else
clean:
@echo "cleaning by CLEAN not defined"
- -rm -f $(foreach dir, $(SUBDIRS), $($(dir)_OBJECTS) $($(dir)_MODULES) $($(dir)_TARGETS))
+ -rm -f $(foreach dir, $(SUBDIRS), $($(dir)_OBJECTS) $($(dir)_MODULES) $($(dir)_TARGETS)) $O/*
endif
help:
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/compute_forces_crust_mantle_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/compute_forces_crust_mantle_cuda.cu 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/compute_forces_crust_mantle_cuda.cu 2013-11-04 08:16:42 UTC (rev 22977)
@@ -94,20 +94,27 @@
realw* sigma_yz) {
realw R_xx_val,R_yy_val;
-
+ int offset_sls;
+
for(int i_sls = 0; i_sls < N_SLS; i_sls++){
// index
- // note: index for R_xx,.. here is (i_sls,i,j,k,ispec) and not (i,j,k,ispec,i_sls) as in local version
- // local version: offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls);
- R_xx_val = R_xx[i_sls + N_SLS*(tx + NGLL3*working_element)];
- R_yy_val = R_yy[i_sls + N_SLS*(tx + NGLL3*working_element)];
+ // note: index for R_xx,.. here is (i,j,k,i_sls,ispec) and not (i,j,k,ispec,i_sls) as in local version
+ // see local version: offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls);
+ // indexing examples:
+ // (i,j,k,ispec,i_sls) -> offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls)
+ // (i_sls,i,j,k,ispec) -> offset_sls = i_sls + N_SLS*(tx + NGLL3*working_element)
+ // (i,j,k,i_sls,ispec) -> offset_sls = tx + NGLL3*(i_sls + N_SLS*working_element)
+ offset_sls = tx + NGLL3*(i_sls + N_SLS*working_element);
+
+ R_xx_val = R_xx[offset_sls];
+ R_yy_val = R_yy[offset_sls];
*sigma_xx = *sigma_xx - R_xx_val;
*sigma_yy = *sigma_yy - R_yy_val;
*sigma_zz = *sigma_zz + R_xx_val + R_yy_val;
- *sigma_xy = *sigma_xy - R_xy[i_sls + N_SLS*(tx + NGLL3*working_element)];
- *sigma_xz = *sigma_xz - R_xz[i_sls + N_SLS*(tx + NGLL3*working_element)];
- *sigma_yz = *sigma_yz - R_yz[i_sls + N_SLS*(tx + NGLL3*working_element)];
+ *sigma_xy = *sigma_xy - R_xy[offset_sls];
+ *sigma_xz = *sigma_xz - R_xz[offset_sls];
+ *sigma_yz = *sigma_yz - R_yz[offset_sls];
}
}
@@ -131,7 +138,8 @@
realw fac;
realw alphaval_loc,betaval_loc,gammaval_loc;
realw factor_loc,Sn,Snp1;
-
+ int offset_sls;
+
// shear moduli for common factor (only Q_mu attenuation)
if( ANISOTROPY ){
fac = d_c44store[tx + NGLL3_PADDED * working_element];
@@ -142,16 +150,19 @@
// use Runge-Kutta scheme to march in time
for(int i_sls = 0; i_sls < N_SLS; i_sls++){
// indices
- // note: index for R_xx,... here is (i_sls,i,j,k,ispec) and not (i,j,k,ispec,i_sls) as in local version
- // local version: offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls);
+ // note: index for R_xx,... here is (i,j,k,i_sls,ispec) and not (i,j,k,ispec,i_sls) as in local version
//
- // either mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
+ // index:
+ // (i,j,k,i_sls,ispec) -> offset_sls = tx + NGLL3*(i_sls + N_SLS*working_element)
+ offset_sls = tx + NGLL3*(i_sls + N_SLS*working_element);
+
+ // either mustore(i,j,k,ispec) * factor_common(i,j,k,i_sls,ispec)
// or factor_common(i_sls,:,:,:,ispec) * c44store(:,:,:,ispec)
if( USE_3D_ATTENUATION_ARRAYS ){
// array dimension: factor_common(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC)
- factor_loc = fac * factor_common[i_sls + N_SLS*(tx + NGLL3*working_element)];
+ factor_loc = fac * factor_common[offset_sls];
}else{
- // array dimension: factor_common(N_SLS,1,1,1,NSPEC)
+ // array dimension: factor_common(1,1,1,N_SLS,NSPEC)
factor_loc = fac * factor_common[i_sls + N_SLS*working_element];
}
@@ -159,36 +170,33 @@
betaval_loc = betaval[i_sls];
gammaval_loc = gammaval[i_sls];
+
// term in xx
Sn = factor_loc * epsilondev_xx[tx + NGLL3 * working_element]; //(i,j,k,ispec)
Snp1 = factor_loc * epsilondev_xx_loc; //(i,j,k)
- R_xx[i_sls + N_SLS*(tx + NGLL3*working_element)] =
- alphaval_loc * R_xx[i_sls + N_SLS*(tx + NGLL3*working_element)] + betaval_loc * Sn + gammaval_loc * Snp1;
+ R_xx[offset_sls] = alphaval_loc * R_xx[offset_sls] + betaval_loc * Sn + gammaval_loc * Snp1;
// term in yy
Sn = factor_loc * epsilondev_yy[tx + NGLL3 * working_element];
Snp1 = factor_loc * epsilondev_yy_loc;
- R_yy[i_sls + N_SLS*(tx + NGLL3*working_element)] =
- alphaval_loc * R_yy[i_sls + N_SLS*(tx + NGLL3*working_element)] + betaval_loc * Sn + gammaval_loc * Snp1;
+ R_yy[offset_sls] = alphaval_loc * R_yy[offset_sls] + betaval_loc * Sn + gammaval_loc * Snp1;
+
// term in zz not computed since zero trace
// term in xy
Sn = factor_loc * epsilondev_xy[tx + NGLL3 * working_element];
Snp1 = factor_loc * epsilondev_xy_loc;
- R_xy[i_sls + N_SLS*(tx + NGLL3*working_element)] =
- alphaval_loc * R_xy[i_sls + N_SLS*(tx + NGLL3*working_element)] + betaval_loc * Sn + gammaval_loc * Snp1;
+ R_xy[offset_sls] = alphaval_loc * R_xy[offset_sls] + betaval_loc * Sn + gammaval_loc * Snp1;
// term in xz
Sn = factor_loc * epsilondev_xz[tx + NGLL3 * working_element];
Snp1 = factor_loc * epsilondev_xz_loc;
- R_xz[i_sls + N_SLS*(tx + NGLL3*working_element)] =
- alphaval_loc * R_xz[i_sls + N_SLS*(tx + NGLL3*working_element)] + betaval_loc * Sn + gammaval_loc * Snp1;
+ R_xz[offset_sls] = alphaval_loc * R_xz[offset_sls] + betaval_loc * Sn + gammaval_loc * Snp1;
// term in yz
Sn = factor_loc * epsilondev_yz[tx + NGLL3 * working_element];
Snp1 = factor_loc * epsilondev_yz_loc;
- R_yz[i_sls + N_SLS*(tx + NGLL3*working_element)] =
- alphaval_loc * R_yz[i_sls + N_SLS*(tx + NGLL3*working_element)] + betaval_loc * Sn + gammaval_loc * Snp1;
+ R_yz[offset_sls] = alphaval_loc * R_yz[offset_sls] + betaval_loc * Sn + gammaval_loc * Snp1;
}
}
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/compute_forces_inner_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/compute_forces_inner_core_cuda.cu 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/cuda/compute_forces_inner_core_cuda.cu 2013-11-04 08:16:42 UTC (rev 22977)
@@ -68,24 +68,28 @@
realw* sigma_xz,
realw* sigma_yz) {
- int offset;
realw R_xx_val,R_yy_val;
+ int offset_sls;
for(int i_sls = 0; i_sls < N_SLS; i_sls++){
// index
// note: index for R_xx,.. here is (i_sls,i,j,k,ispec) and not (i,j,k,ispec,i_sls) as in local version
// local version: offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls);
- offset = i_sls + N_SLS*(tx + NGLL3*working_element);
+ // indexing examples:
+ // (i,j,k,ispec,i_sls) -> offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls)
+ // (i_sls,i,j,k,ispec) -> offset_sls = i_sls + N_SLS*(tx + NGLL3*working_element)
+ // (i,j,k,i_sls,ispec) -> offset_sls = tx + NGLL3*(i_sls + N_SLS*working_element)
+ offset_sls = tx + NGLL3*(i_sls + N_SLS*working_element);
- R_xx_val = R_xx[offset];
- R_yy_val = R_yy[offset];
+ R_xx_val = R_xx[offset_sls];
+ R_yy_val = R_yy[offset_sls];
*sigma_xx = *sigma_xx - R_xx_val;
*sigma_yy = *sigma_yy - R_yy_val;
*sigma_zz = *sigma_zz + R_xx_val + R_yy_val;
- *sigma_xy = *sigma_xy - R_xy[offset];
- *sigma_xz = *sigma_xz - R_xz[offset];
- *sigma_yz = *sigma_yz - R_yz[offset];
+ *sigma_xy = *sigma_xy - R_xy[offset_sls];
+ *sigma_xz = *sigma_xz - R_xz[offset_sls];
+ *sigma_yz = *sigma_yz - R_yz[offset_sls];
}
}
@@ -105,26 +109,26 @@
int USE_3D_ATTENUATION_ARRAYS
){
- int offset;
realw mul;
realw alphaval_loc,betaval_loc,gammaval_loc;
realw factor_loc,Sn,Snp1;
+ int offset_sls;
mul = d_muv[tx + NGLL3_PADDED * working_element];
// use Runge-Kutta scheme to march in time
for(int i_sls = 0; i_sls < N_SLS; i_sls++){
-
// indices
- // note: index for R_xx,... here is (i_sls,i,j,k,ispec) and not (i,j,k,ispec,i_sls) as in local version
- // local version: offset_sls = tx + NGLL3*(working_element + NSPEC*i_sls);
- // index for (i_sls,i,j,k,ispec)
- offset = i_sls + N_SLS*(tx + NGLL3*working_element);
+ // note: index for R_xx,... here is (i,j,k,i_sls,ispec) and not (i,j,k,ispec,i_sls) as in local version
+ // index for R_xx(i,j,k,i_sls,ispec),..
+ offset_sls = tx + NGLL3*(i_sls + N_SLS*working_element);
if( USE_3D_ATTENUATION_ARRAYS ){
- factor_loc = mul * factor_common[offset]; //mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
+ //mustore(i,j,k,ispec) * factor_common(i,j,k,i_sls,ispec)
+ factor_loc = mul * factor_common[offset_sls];
}else{
- factor_loc = mul * factor_common[i_sls + N_SLS*working_element]; //mustore(i,j,k,ispec) * factor_common(i_sls,1,1,1,ispec)
+ //mustore(i,j,k,ispec) * factor_common(1,1,1,i_sls,ispec)
+ factor_loc = mul * factor_common[i_sls + N_SLS*working_element];
}
alphaval_loc = alphaval[i_sls]; // (i_sls)
betaval_loc = betaval[i_sls];
@@ -133,28 +137,28 @@
// term in xx
Sn = factor_loc * epsilondev_xx[tx + NGLL3 * working_element]; //(i,j,k,ispec)
Snp1 = factor_loc * epsilondev_xx_loc; //(i,j,k)
- R_xx[offset] = alphaval_loc * R_xx[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+ R_xx[offset_sls] = alphaval_loc * R_xx[offset_sls] + betaval_loc * Sn + gammaval_loc * Snp1;
// term in yy
Sn = factor_loc * epsilondev_yy[tx + NGLL3 * working_element];
Snp1 = factor_loc * epsilondev_yy_loc;
- R_yy[offset] = alphaval_loc * R_yy[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+ R_yy[offset_sls] = alphaval_loc * R_yy[offset_sls] + betaval_loc * Sn + gammaval_loc * Snp1;
// term in zz not computed since zero trace
// term in xy
Sn = factor_loc * epsilondev_xy[tx + NGLL3 * working_element];
Snp1 = factor_loc * epsilondev_xy_loc;
- R_xy[offset] = alphaval_loc * R_xy[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+ R_xy[offset_sls] = alphaval_loc * R_xy[offset_sls] + betaval_loc * Sn + gammaval_loc * Snp1;
// term in xz
Sn = factor_loc * epsilondev_xz[tx + NGLL3 * working_element];
Snp1 = factor_loc * epsilondev_xz_loc;
- R_xz[offset] = alphaval_loc * R_xz[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+ R_xz[offset_sls] = alphaval_loc * R_xz[offset_sls] + betaval_loc * Sn + gammaval_loc * Snp1;
// term in yz
Sn = factor_loc * epsilondev_yz[tx + NGLL3 * working_element];
Snp1 = factor_loc * epsilondev_yz_loc;
- R_yz[offset] = alphaval_loc * R_yz[offset] + betaval_loc * Sn + gammaval_loc * Snp1;
+ R_yz[offset_sls] = alphaval_loc * R_yz[offset_sls] + betaval_loc * Sn + gammaval_loc * Snp1;
}
}
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/create_central_cube.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/create_central_cube.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/create_central_cube.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -219,14 +219,13 @@
! determine where we cut the central cube to share it between CHUNK_AB & CHUNK_AB_ANTIPODE
! in the case of mod(NPROC_XI,2)/=0, the cut is asymetric and the bigger part is for CHUNK_AB
+ nz_inf_limit = nz_central_cube
if (mod(NPROC_XI,2)/=0 .and. NPROC_XI > 1) then
if (ichunk == CHUNK_AB) then
nz_inf_limit = ((nz_central_cube*2)/NPROC_XI)*floor(NPROC_XI/2.d0)
else if (ichunk == CHUNK_AB_ANTIPODE) then
nz_inf_limit = ((nz_central_cube*2)/NPROC_XI)*ceiling(NPROC_XI/2.d0)
endif
- else
- nz_inf_limit = nz_central_cube
endif
if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/create_regions_mesh.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/create_regions_mesh.F90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/create_regions_mesh.F90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -512,7 +512,7 @@
nspec_att = 1
endif
allocate(Qmu_store(ATT1,ATT2,ATT3,nspec_att), &
- tau_e_store(N_SLS,ATT1,ATT2,ATT3,nspec_att),stat=ier)
+ tau_e_store(ATT1,ATT2,ATT3,N_SLS,nspec_att),stat=ier)
if(ier /= 0) stop 'error in allocate 1'
Qmu_store(:,:,:,:) = 0.0; tau_e_store(:,:,:,:,:) = 0.0
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/get_model.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/get_model.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/get_model.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -80,7 +80,7 @@
double precision :: rho,dvp
double precision :: vpv,vph,vsv,vsh,eta_aniso
double precision :: r,r_prem,moho
- integer :: i,j,k
+ integer :: i,j,k,i_sls
! loops over all gll points for this spectral element
do k=1,NGLLZ
@@ -305,12 +305,16 @@
if(ATTENUATION) then
if(ATTENUATION_3D .or. ATTENUATION_1D_WITH_3D_STORAGE) then
- tau_e_store(:,i,j,k,ispec) = tau_e(:)
+ do i_sls = 1,N_SLS
+ tau_e_store(i,j,k,i_sls,ispec) = tau_e(i_sls)
+ enddo
Qmu_store(i,j,k,ispec) = Qmu
else
! store values from mid-point for whole element
if( i == NGLLX/2 .and. j == NGLLY/2 .and. k == NGLLZ/2 ) then
- tau_e_store(:,1,1,1,ispec) = tau_e(:)
+ do i_sls = 1,N_SLS
+ tau_e_store(1,1,1,i_sls,ispec) = tau_e(i_sls)
+ enddo
Qmu_store(1,1,1,ispec) = Qmu
endif
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/get_perm_color.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/get_perm_color.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/get_perm_color.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -1154,7 +1154,7 @@
integer, intent(in) :: nspec
integer, intent(in), dimension(nspec) :: perm
- real(kind=CUSTOM_REAL), intent(inout), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,nspec) :: &
+ real(kind=CUSTOM_REAL), intent(inout), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,nspec) :: &
array_to_permute,temp_array
integer :: old_ispec,new_ispec
@@ -1184,7 +1184,7 @@
integer, intent(in) :: nspec
integer, intent(in), dimension(nspec) :: perm
- real(kind=CUSTOM_REAL), intent(inout), dimension(N_SLS,1,1,1,nspec) :: &
+ real(kind=CUSTOM_REAL), intent(inout), dimension(1,1,1,N_SLS,nspec) :: &
array_to_permute,temp_array
integer :: old_ispec,new_ispec
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/setup_color_perm.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/setup_color_perm.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/meshfem3D/setup_color_perm.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -860,13 +860,13 @@
if (ATTENUATION) then
if (ATTENUATION_3D .or. ATTENUATION_1D_WITH_3D_STORAGE) then
allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec))
- allocate(temp_array_real_sls(N_SLS,NGLLX,NGLLY,NGLLZ,nspec))
+ allocate(temp_array_real_sls(NGLLX,NGLLY,NGLLZ,N_SLS,nspec))
call permute_elements_real(Qmu_store,temp_array_real,perm,nspec)
call permute_elements_real_sls(tau_e_store,temp_array_real_sls,perm,nspec)
deallocate(temp_array_real,temp_array_real_sls)
else
allocate(temp_array_real(1,1,1,nspec))
- allocate(temp_array_real_sls(N_SLS,1,1,1,nspec))
+ allocate(temp_array_real_sls(1,1,1,N_SLS,nspec))
call permute_elements_real1(Qmu_store,temp_array_real,perm,nspec)
call permute_elements_real_sls1(tau_e_store,temp_array_real_sls,perm,nspec)
deallocate(temp_array_real,temp_array_real_sls)
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/save_header_file.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/save_header_file.F90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/shared/save_header_file.F90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -590,8 +590,7 @@
endif
write(IOUT,*)
-!! DK DK Jul 2013: we need that for the part1_*.f90 and part2_*.f90 include files, which some compilers
-!! DK DK Jul 2013: refuse to preprocess even if we rename them *.F90
+ ! we use this vectorization flag for solver routines in files **.f90
#ifdef FORCE_VECTORIZATION
write(IOUT,*) 'logical, parameter :: FORCE_VECTORIZATION_VAL = .true.'
#else
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_boundary_kernel.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_boundary_kernel.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_boundary_kernel.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -33,7 +33,6 @@
use specfem_par_crustmantle
use specfem_par_innercore
use specfem_par_outercore
- use specfem_par_movie
implicit none
! local parameters
@@ -43,9 +42,12 @@
integer:: ispec,i,j,k,l,iglob
real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl
real(kind=CUSTOM_REAL) :: tempx1l,tempx2l,tempx3l
+ real(kind=CUSTOM_REAL) :: gradx,grady,gradz
+ logical,dimension(NGLOB_OUTER_CORE) :: mask_ibool
! transfers wavefields onto CPU
if( GPU_MODE ) then
+
! crust/mantle
call transfer_accel_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,accel_crust_mantle,Mesh_pointer)
call transfer_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle,Mesh_pointer)
@@ -59,187 +61,221 @@
call transfer_displ_oc_from_device(NGLOB_OUTER_CORE,displ_outer_core,Mesh_pointer)
call transfer_b_displ_oc_from_device(NGLOB_OUTER_CORE,b_displ_outer_core,Mesh_pointer)
- ! pre-calculates gradients on CPU
+ ! pre-calculates gradients in outer core
+ ! note: for CPU, this is already done in compute_kernels_outer_core() routine
+
+ ! pre-calculates gradients
+ mask_ibool(:) = .false.
do ispec = 1, NSPEC_OUTER_CORE
do k = 1, NGLLZ
do j = 1, NGLLY
do i = 1, NGLLX
+
+ ! global index
iglob = ibool_outer_core(i,j,k,ispec)
- xixl = xix_outer_core(i,j,k,ispec)
- xiyl = xiy_outer_core(i,j,k,ispec)
- xizl = xiz_outer_core(i,j,k,ispec)
- etaxl = etax_outer_core(i,j,k,ispec)
- etayl = etay_outer_core(i,j,k,ispec)
- etazl = etaz_outer_core(i,j,k,ispec)
- gammaxl = gammax_outer_core(i,j,k,ispec)
- gammayl = gammay_outer_core(i,j,k,ispec)
- gammazl = gammaz_outer_core(i,j,k,ispec)
+ ! only calculates gradients once for shared nodes
+ if( .not. mask_ibool(iglob) ) then
- ! calculates gradient grad(b_displ)
- tempx1l = 0._CUSTOM_REAL
- tempx2l = 0._CUSTOM_REAL
- tempx3l = 0._CUSTOM_REAL
- do l=1,NGLLX
- tempx1l = tempx1l + b_displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- enddo
- do l=1,NGLLY
- tempx2l = tempx2l + b_displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- enddo
- do l=1,NGLLZ
- tempx3l = tempx3l + b_displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- enddo
- b_vector_displ_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
- b_vector_displ_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
- b_vector_displ_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+ ! masks this global point
+ mask_ibool(iglob) = .true.
- ! calculates gradient grad(accel)
- tempx1l = 0._CUSTOM_REAL
- tempx2l = 0._CUSTOM_REAL
- tempx3l = 0._CUSTOM_REAL
- do l=1,NGLLX
- tempx1l = tempx1l + accel_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- enddo
- do l=1,NGLLY
- tempx2l = tempx2l + accel_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- enddo
- do l=1,NGLLZ
- tempx3l = tempx3l + accel_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- enddo
- vector_accel_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
- vector_accel_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
- vector_accel_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+ xixl = xix_outer_core(i,j,k,ispec)
+ xiyl = xiy_outer_core(i,j,k,ispec)
+ xizl = xiz_outer_core(i,j,k,ispec)
+ etaxl = etax_outer_core(i,j,k,ispec)
+ etayl = etay_outer_core(i,j,k,ispec)
+ etazl = etaz_outer_core(i,j,k,ispec)
+ gammaxl = gammax_outer_core(i,j,k,ispec)
+ gammayl = gammay_outer_core(i,j,k,ispec)
+ gammazl = gammaz_outer_core(i,j,k,ispec)
- ! calculates gradient grad(displ)
- tempx1l = 0._CUSTOM_REAL
- tempx2l = 0._CUSTOM_REAL
- tempx3l = 0._CUSTOM_REAL
- do l=1,NGLLX
- tempx1l = tempx1l + displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- enddo
- do l=1,NGLLY
- tempx2l = tempx2l + displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- enddo
- do l=1,NGLLZ
- tempx3l = tempx3l + displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- enddo
- vector_displ_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
- vector_displ_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
- vector_displ_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+ ! calculates gradient grad(b_displ)
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ do l=1,NGLLX
+ tempx1l = tempx1l + b_displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ enddo
+ do l=1,NGLLY
+ tempx2l = tempx2l + b_displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ enddo
+ do l=1,NGLLZ
+ tempx3l = tempx3l + b_displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
+ gradx = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ grady = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ gradz = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+ ! assigns and stores gradient on global nodes
+ b_vector_displ_outer_core(1,iglob) = gradx
+ b_vector_displ_outer_core(2,iglob) = grady
+ b_vector_displ_outer_core(3,iglob) = gradz
+
+ ! calculates gradient grad(accel)
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ do l=1,NGLLX
+ tempx1l = tempx1l + accel_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ enddo
+ do l=1,NGLLY
+ tempx2l = tempx2l + accel_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ enddo
+ do l=1,NGLLZ
+ tempx3l = tempx3l + accel_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
+ gradx = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ grady = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ gradz = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+ vector_accel_outer_core(1,iglob) = gradx
+ vector_accel_outer_core(2,iglob) = grady
+ vector_accel_outer_core(3,iglob) = gradz
+
+ ! calculates gradient grad(displ)
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ do l=1,NGLLX
+ tempx1l = tempx1l + displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ enddo
+ do l=1,NGLLY
+ tempx2l = tempx2l + displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ enddo
+ do l=1,NGLLZ
+ tempx3l = tempx3l + displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
+ gradx = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ grady = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ gradz = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+ vector_displ_outer_core(1,iglob) = gradx
+ vector_displ_outer_core(2,iglob) = grady
+ vector_displ_outer_core(3,iglob) = gradz
+ endif
+
enddo
enddo
enddo
enddo
- endif
+ endif ! GPU_MODE
+
! updates kernels on CPU
fluid_solid_boundary = .false.
iregion_code = IREGION_CRUST_MANTLE
! Moho
if (.not. SUPPRESS_CRUSTAL_MESH .and. HONOR_1D_SPHERICAL_MOHO) then
- call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_moho_top,normal_moho,moho_kl_top,fluid_solid_boundary,NSPEC2D_MOHO)
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle,b_displ_crust_mantle, &
+ nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_moho_top,normal_moho,moho_kl_top,fluid_solid_boundary,NSPEC2D_MOHO)
- call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_bot,ibelm_moho_bot,normal_moho,moho_kl_bot,fluid_solid_boundary,NSPEC2D_MOHO)
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle,b_displ_crust_mantle, &
+ nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_bot,ibelm_moho_bot,normal_moho,moho_kl_bot,fluid_solid_boundary,NSPEC2D_MOHO)
moho_kl = moho_kl + (moho_kl_top - moho_kl_bot) * deltat
endif
! 400
- call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_400_top,normal_400,d400_kl_top,fluid_solid_boundary,NSPEC2D_400)
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle,b_displ_crust_mantle, &
+ nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_400_top,normal_400,d400_kl_top,fluid_solid_boundary,NSPEC2D_400)
- call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_bot,ibelm_400_bot,normal_400,d400_kl_bot,fluid_solid_boundary,NSPEC2D_400)
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle,b_displ_crust_mantle, &
+ nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_bot,ibelm_400_bot,normal_400,d400_kl_bot,fluid_solid_boundary,NSPEC2D_400)
d400_kl = d400_kl + (d400_kl_top - d400_kl_bot) * deltat
! 670
- call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_670_top,normal_670,d670_kl_top,fluid_solid_boundary,NSPEC2D_670)
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle,b_displ_crust_mantle, &
+ nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_670_top,normal_670,d670_kl_top,fluid_solid_boundary,NSPEC2D_670)
- call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_bot,ibelm_670_bot,normal_670,d670_kl_bot,fluid_solid_boundary,NSPEC2D_670)
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle,b_displ_crust_mantle, &
+ nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_bot,ibelm_670_bot,normal_670,d670_kl_bot,fluid_solid_boundary,NSPEC2D_670)
d670_kl = d670_kl + (d670_kl_top - d670_kl_bot) * deltat
@@ -247,22 +283,23 @@
fluid_solid_boundary = .true.
iregion_code = IREGION_CRUST_MANTLE
- call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle, muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_bottom_crust_mantle,normal_top_outer_core, &
- cmb_kl_top,fluid_solid_boundary,NSPEC2D_CMB)
+ call compute_boundary_kernel_depth(displ_crust_mantle,accel_crust_mantle,b_displ_crust_mantle, &
+ nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+ hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle, muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_bottom_crust_mantle,normal_top_outer_core, &
+ cmb_kl_top,fluid_solid_boundary,NSPEC2D_CMB)
iregion_code = IREGION_OUTER_CORE
@@ -270,43 +307,43 @@
allocate(dummy_ispec_is_tiso(NSPEC_OUTER_CORE))
dummy_ispec_is_tiso(:) = .false.
- call compute_boundary_kernel_depth(vector_displ_outer_core,vector_accel_outer_core, &
- b_vector_displ_outer_core,nspec_outer_core, &
- iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core,&
- gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_outer_core,kappavstore_outer_core,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- k_bot,ibelm_top_outer_core,normal_top_outer_core, &
- cmb_kl_bot,fluid_solid_boundary,NSPEC2D_CMB)
+ call compute_boundary_kernel_depth(vector_displ_outer_core,vector_accel_outer_core,b_vector_displ_outer_core, &
+ nspec_outer_core, &
+ iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
+ xix_outer_core,xiy_outer_core,xiz_outer_core, &
+ etax_outer_core,etay_outer_core,etaz_outer_core,&
+ gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_outer_core,kappavstore_outer_core,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ k_bot,ibelm_top_outer_core,normal_top_outer_core, &
+ cmb_kl_bot,fluid_solid_boundary,NSPEC2D_CMB)
cmb_kl = cmb_kl + (cmb_kl_top - cmb_kl_bot) * deltat
! ICB
fluid_solid_boundary = .true.
- call compute_boundary_kernel_depth(vector_displ_outer_core,vector_accel_outer_core, &
- b_vector_displ_outer_core,nspec_outer_core, &
- iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core,&
- gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_outer_core,kappavstore_outer_core,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- k_top,ibelm_bottom_outer_core,normal_bottom_outer_core, &
- icb_kl_top,fluid_solid_boundary,NSPEC2D_ICB)
+ call compute_boundary_kernel_depth(vector_displ_outer_core,vector_accel_outer_core,b_vector_displ_outer_core, &
+ nspec_outer_core, &
+ iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
+ xix_outer_core,xiy_outer_core,xiz_outer_core, &
+ etax_outer_core,etay_outer_core,etaz_outer_core,&
+ gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_outer_core,kappavstore_outer_core,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ k_top,ibelm_bottom_outer_core,normal_bottom_outer_core, &
+ icb_kl_top,fluid_solid_boundary,NSPEC2D_ICB)
deallocate(dummy_ispec_is_tiso)
@@ -317,22 +354,22 @@
allocate(dummy_ispec_is_tiso(NSPEC_INNER_CORE))
dummy_ispec_is_tiso(:) = .false.
- call compute_boundary_kernel_depth(displ_inner_core,accel_inner_core, &
- b_displ_inner_core,nspec_inner_core,iregion_code, &
- ystore_inner_core,zstore_inner_core,ibool_inner_core,dummy_ispec_is_tiso, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core,&
- gammax_inner_core,gammay_inner_core,gammaz_inner_core,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
- dummy_array,dummy_array,dummy_array, &
- c11store_inner_core,c12store_inner_core,c13store_inner_core,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- c33store_inner_core,dummy_array,dummy_array, &
- dummy_array,c44store_inner_core,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- k_bot,ibelm_top_inner_core,normal_bottom_outer_core, &
- icb_kl_bot,fluid_solid_boundary,NSPEC2D_ICB)
+ call compute_boundary_kernel_depth(displ_inner_core,accel_inner_core,b_displ_inner_core, &
+ nspec_inner_core,iregion_code, &
+ ystore_inner_core,zstore_inner_core,ibool_inner_core,dummy_ispec_is_tiso, &
+ xix_inner_core,xiy_inner_core,xiz_inner_core, &
+ etax_inner_core,etay_inner_core,etaz_inner_core,&
+ gammax_inner_core,gammay_inner_core,gammaz_inner_core,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
+ dummy_array,dummy_array,dummy_array, &
+ c11store_inner_core,c12store_inner_core,c13store_inner_core,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ c33store_inner_core,dummy_array,dummy_array, &
+ dummy_array,c44store_inner_core,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ k_bot,ibelm_top_inner_core,normal_bottom_outer_core, &
+ icb_kl_bot,fluid_solid_boundary,NSPEC2D_ICB)
deallocate(dummy_ispec_is_tiso)
icb_kl = icb_kl + (icb_kl_top - icb_kl_bot) * deltat
@@ -343,7 +380,8 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine compute_boundary_kernel_depth(displ,accel,b_displ,nspec,iregion_code, &
+ subroutine compute_boundary_kernel_depth(displ,accel,b_displ, &
+ nspec,iregion_code, &
ystore,zstore,ibool,ispec_is_tiso, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_yy,hprime_zz, &
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -82,7 +82,7 @@
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
@@ -96,8 +96,8 @@
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NDIM) :: rho_s_H
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc
! local parameters
real(kind=CUSTOM_REAL) one_minus_sum_beta_use
@@ -190,11 +190,11 @@
else
epsilon_trace_over_3(INDEX_IJK,ispec) = templ
endif
- epsilondev_loc(1,INDEX_IJK) = duxdxl - templ
- epsilondev_loc(2,INDEX_IJK) = duydyl - templ
- epsilondev_loc(3,INDEX_IJK) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
+ epsilondev_loc(INDEX_IJK,1) = duxdxl - templ
+ epsilondev_loc(INDEX_IJK,2) = duydyl - templ
+ epsilondev_loc(INDEX_IJK,3) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_loc(INDEX_IJK,4) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_loc(INDEX_IJK,5) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
endif
!
@@ -237,50 +237,47 @@
! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop
! in order to vectorize the outer loop
- R_xx_val = R_xx(1,INDEX_IJK,ispec)
- R_yy_val = R_yy(1,INDEX_IJK,ispec)
+ R_xx_val = R_xx(INDEX_IJK,1,ispec)
+ R_yy_val = R_yy(INDEX_IJK,1,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(1,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(1,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(1,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,1,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,1,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,1,ispec)
- R_xx_val = R_xx(2,INDEX_IJK,ispec)
- R_yy_val = R_yy(2,INDEX_IJK,ispec)
+ R_xx_val = R_xx(INDEX_IJK,2,ispec)
+ R_yy_val = R_yy(INDEX_IJK,2,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(2,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(2,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(2,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,2,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,2,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,2,ispec)
- R_xx_val = R_xx(3,INDEX_IJK,ispec)
- R_yy_val = R_yy(3,INDEX_IJK,ispec)
+ R_xx_val = R_xx(INDEX_IJK,3,ispec)
+ R_yy_val = R_yy(INDEX_IJK,3,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(3,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(3,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(3,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,3,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,3,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,3,ispec)
#else
!daniel debug: att - debug update
! call compute_element_att_mem_up_cm(ispec,INDEX_IJK, &
-! R_xx(1,INDEX_IJK,ispec), &
-! R_yy(1,INDEX_IJK,ispec), &
-! R_xy(1,INDEX_IJK,ispec), &
-! R_xz(1,INDEX_IJK,ispec), &
-! R_yz(1,INDEX_IJK,ispec), &
-! epsilondev_loc(:,INDEX_IJK),muvstore(INDEX_IJK,ispec),is_backward_field)
+! R_xx(INDEX_IJK,1,ispec), &
+! R_yy(INDEX_IJK,1,ispec), &
+! R_xy(INDEX_IJK,1,ispec), &
+! R_xz(INDEX_IJK,1,ispec), &
+! R_yz(INDEX_IJK,1,ispec), &
+! epsilondev_loc(INDEX_IJK,:),muvstore(INDEX_IJK,ispec),is_backward_field)
! note: function inlining is generally done by fortran compilers;
! compilers decide based on performance heuristics
! note: fortran passes pointers to array location, thus R_memory(1,1,...) should be fine
- call compute_element_att_stress(R_xx(1,INDEX_IJK,ispec), &
- R_yy(1,INDEX_IJK,ispec), &
- R_xy(1,INDEX_IJK,ispec), &
- R_xz(1,INDEX_IJK,ispec), &
- R_yz(1,INDEX_IJK,ispec), &
+ call compute_element_att_stress(i,j,k,R_xx(1,1,1,1,ispec),R_yy(1,1,1,1,ispec),R_xy(1,1,1,1,ispec), &
+ R_xz(1,1,1,1,ispec),R_yz(1,1,1,1,ispec), &
sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
#endif
@@ -361,9 +358,9 @@
! precompute vector
factor = dble(jacobianl) * wgll_cube(INDEX_IJK)
- rho_s_H(1,INDEX_IJK) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
- rho_s_H(2,INDEX_IJK) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
- rho_s_H(3,INDEX_IJK) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+ rho_s_H(INDEX_IJK,1) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+ rho_s_H(INDEX_IJK,2) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+ rho_s_H(INDEX_IJK,3) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
else
@@ -388,9 +385,9 @@
! precompute vector
factor = jacobianl * wgll_cube(INDEX_IJK)
- rho_s_H(1,INDEX_IJK) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,INDEX_IJK) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,INDEX_IJK) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+ rho_s_H(INDEX_IJK,1) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+ rho_s_H(INDEX_IJK,2) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+ rho_s_H(INDEX_IJK,3) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
endif
@@ -466,7 +463,7 @@
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
@@ -484,8 +481,8 @@
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NDIM) :: rho_s_H
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc
! local parameters
real(kind=CUSTOM_REAL) one_minus_sum_beta_use
@@ -591,11 +588,11 @@
else
epsilon_trace_over_3(INDEX_IJK,ispec) = templ
endif
- epsilondev_loc(1,INDEX_IJK) = duxdxl - templ
- epsilondev_loc(2,INDEX_IJK) = duydyl - templ
- epsilondev_loc(3,INDEX_IJK) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
+ epsilondev_loc(INDEX_IJK,1) = duxdxl - templ
+ epsilondev_loc(INDEX_IJK,2) = duydyl - templ
+ epsilondev_loc(INDEX_IJK,3) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_loc(INDEX_IJK,4) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_loc(INDEX_IJK,5) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
endif
!
@@ -822,41 +819,38 @@
! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop
! in order to vectorize the outer loop
- R_xx_val = R_xx(1,INDEX_IJK,ispec)
- R_yy_val = R_yy(1,INDEX_IJK,ispec)
+ R_xx_val = R_xx(INDEX_IJK,1,ispec)
+ R_yy_val = R_yy(INDEX_IJK,1,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(1,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(1,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(1,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,1,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,1,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,1,ispec)
- R_xx_val = R_xx(2,INDEX_IJK,ispec)
- R_yy_val = R_yy(2,INDEX_IJK,ispec)
+ R_xx_val = R_xx(INDEX_IJK,2,ispec)
+ R_yy_val = R_yy(INDEX_IJK,2,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(2,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(2,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(2,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,2,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,2,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,2,ispec)
- R_xx_val = R_xx(3,ijk,1,1,ispec)
- R_yy_val = R_yy(3,ijk,1,1,ispec)
+ R_xx_val = R_xx(INDEX_IJK,3,ispec)
+ R_yy_val = R_yy(INDEX_IJK,3,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(3,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(3,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(3,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,3,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,3,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,3,ispec)
#else
! note: function inlining is generally done by fortran compilers;
! compilers decide based on performance heuristics
! note: fortran passes pointers to array location, thus R_memory(1,1,...) is fine
- call compute_element_att_stress(R_xx(1,INDEX_IJK,ispec), &
- R_yy(1,INDEX_IJK,ispec), &
- R_xy(1,INDEX_IJK,ispec), &
- R_xz(1,INDEX_IJK,ispec), &
- R_yz(1,INDEX_IJK,ispec), &
+ call compute_element_att_stress(i,j,k,R_xx(1,1,1,1,ispec),R_yy(1,1,1,1,ispec),R_xy(1,1,1,1,ispec), &
+ R_xz(1,1,1,1,ispec),R_yz(1,1,1,1,ispec), &
sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
#endif
@@ -936,9 +930,9 @@
! precompute vector
factor = dble(jacobianl) * wgll_cube(INDEX_IJK)
- rho_s_H(1,INDEX_IJK) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
- rho_s_H(2,INDEX_IJK) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
- rho_s_H(3,INDEX_IJK) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+ rho_s_H(INDEX_IJK,1) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+ rho_s_H(INDEX_IJK,2) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+ rho_s_H(INDEX_IJK,3) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
else
@@ -963,9 +957,9 @@
! precompute vector
factor = jacobianl * wgll_cube(INDEX_IJK)
- rho_s_H(1,INDEX_IJK) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,INDEX_IJK) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,INDEX_IJK) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+ rho_s_H(INDEX_IJK,1) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+ rho_s_H(INDEX_IJK,2) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+ rho_s_H(INDEX_IJK,3) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
endif
@@ -1041,7 +1035,7 @@
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: epsilon_trace_over_3
@@ -1059,8 +1053,8 @@
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NDIM) :: rho_s_H
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc
! local parameters
real(kind=CUSTOM_REAL) minus_sum_beta,mul
@@ -1154,11 +1148,11 @@
else
epsilon_trace_over_3(INDEX_IJK,ispec) = templ
endif
- epsilondev_loc(1,INDEX_IJK) = duxdxl - templ
- epsilondev_loc(2,INDEX_IJK) = duydyl - templ
- epsilondev_loc(3,INDEX_IJK) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
+ epsilondev_loc(INDEX_IJK,1) = duxdxl - templ
+ epsilondev_loc(INDEX_IJK,2) = duydyl - templ
+ epsilondev_loc(INDEX_IJK,3) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_loc(INDEX_IJK,4) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_loc(INDEX_IJK,5) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
endif
!
@@ -1232,42 +1226,39 @@
! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop
! in order to vectorize the outer loop
- R_xx_val = R_xx(1,INDEX_IJK,ispec)
- R_yy_val = R_yy(1,INDEX_IJK,ispec)
+ R_xx_val = R_xx(INDEX_IJK,1,ispec)
+ R_yy_val = R_yy(INDEX_IJK,1,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(1,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(1,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(1,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,1,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,1,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,1,ispec)
- R_xx_val = R_xx(2,INDEX_IJK,ispec)
- R_yy_val = R_yy(2,INDEX_IJK,ispec)
+ R_xx_val = R_xx(INDEX_IJK,2,ispec)
+ R_yy_val = R_yy(INDEX_IJK,2,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(2,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(2,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(2,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,2,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,2,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,2,ispec)
- R_xx_val = R_xx(3,INDEX_IJK,ispec)
- R_yy_val = R_yy(3,INDEX_IJK,ispec)
+ R_xx_val = R_xx(INDEX_IJK,3,ispec)
+ R_yy_val = R_yy(INDEX_IJK,3,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(3,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(3,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(3,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,3,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,3,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,3,ispec)
#else
! note: function inlining is generally done by fortran compilers;
! compilers decide based on performance heuristics
! note: Fortran passes pointers to array location, thus R_memory(1,1,...) is fine
- call compute_element_att_stress(R_xx(1,INDEX_IJK,ispec), &
- R_yy(1,INDEX_IJK,ispec), &
- R_xy(1,INDEX_IJK,ispec), &
- R_xz(1,INDEX_IJK,ispec), &
- R_yz(1,INDEX_IJK,ispec), &
+ call compute_element_att_stress(i,j,k,R_xx(1,1,1,1,ispec),R_yy(1,1,1,1,ispec),R_xy(1,1,1,1,ispec), &
+ R_xz(1,1,1,1,ispec),R_yz(1,1,1,1,ispec), &
sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
#endif
endif ! ATTENUATION_VAL
@@ -1346,9 +1337,9 @@
! precompute vector
factor = dble(jacobianl) * wgll_cube(INDEX_IJK)
- rho_s_H(1,INDEX_IJK) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
- rho_s_H(2,INDEX_IJK) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
- rho_s_H(3,INDEX_IJK) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+ rho_s_H(INDEX_IJK,1) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+ rho_s_H(INDEX_IJK,2) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+ rho_s_H(INDEX_IJK,3) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
else
@@ -1373,9 +1364,9 @@
! precompute vector
factor = jacobianl * wgll_cube(INDEX_IJK)
- rho_s_H(1,INDEX_IJK) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,INDEX_IJK) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,INDEX_IJK) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+ rho_s_H(INDEX_IJK,1) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+ rho_s_H(INDEX_IJK,2) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+ rho_s_H(INDEX_IJK,3) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
endif
@@ -1410,44 +1401,579 @@
!
- subroutine compute_element_att_stress(R_xx_loc,R_yy_loc,R_xy_loc,R_xz_loc,R_yz_loc, &
+ subroutine compute_element_att_stress(i,j,k,R_xx_loc,R_yy_loc,R_xy_loc,R_xz_loc,R_yz_loc, &
sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
! updates stress with attenuation correction
- use constants_solver,only: CUSTOM_REAL,N_SLS
+ use constants_solver,only: CUSTOM_REAL,N_SLS,NGLLX,NGLLY,NGLLZ
implicit none
+ integer, intent(in) :: i,j,k
! attenuation
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
-! real(kind=CUSTOM_REAL), dimension(5,N_SLS) :: R_memory_loc
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xx_loc
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_yy_loc
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xy_loc
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_xz_loc
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: R_yz_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS) :: R_xx_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS) :: R_yy_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS) :: R_xy_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS) :: R_xz_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS) :: R_yz_loc
real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
! local parameters
- real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1
+ real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
integer :: i_SLS
do i_SLS = 1,N_SLS
- R_xx_val1 = R_xx_loc(i_SLS) ! R_memory(1,i_SLS,i,j,k,ispec)
- R_yy_val1 = R_yy_loc(i_SLS) ! R_memory(2,i_SLS,i,j,k,ispec)
- sigma_xx = sigma_xx - R_xx_val1
- sigma_yy = sigma_yy - R_yy_val1
- sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
- sigma_xy = sigma_xy - R_xy_loc(i_SLS) ! R_memory(3,i_SLS,i,j,k,ispec)
- sigma_xz = sigma_xz - R_xz_loc(i_SLS) ! R_memory(4,i_SLS,i,j,k,ispec)
- sigma_yz = sigma_yz - R_yz_loc(i_SLS) ! R_memory(5,i_SLS,i,j,k,ispec)
+ R_xx_val = R_xx_loc(i,j,k,i_SLS)
+ R_yy_val = R_yy_loc(i,j,k,i_SLS)
+ sigma_xx = sigma_xx - R_xx_val
+ sigma_yy = sigma_yy - R_yy_val
+ sigma_zz = sigma_zz + R_xx_val + R_yy_val
+ sigma_xy = sigma_xy - R_xy_loc(i,j,k,i_SLS)
+ sigma_xz = sigma_xz - R_xz_loc(i,j,k,i_SLS)
+ sigma_yz = sigma_yz - R_yz_loc(i,j,k,i_SLS)
enddo
end subroutine compute_element_att_stress
+!--------------------------------------------------------------------------------------------
+!
+! Deville et al. 2002
+! Higher-Order Methods for Incompressible Fluid Flow
+!
+! subroutines adapted from Deville, Fischer and Mund, High-order methods
+! for incompressible fluid flow, Cambridge University Press (2002),
+! pages 386 and 389 and Figure 8.3.1
+!
+!--------------------------------------------------------------------------------------------
+! matrix - matrix multiplications
+
+! single component routines
+
+ subroutine mxm(A,n1,B,n2,C,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,n2) :: A
+ real(kind=CUSTOM_REAL),dimension(n2,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C
+
+ ! chooses optimized version
+ select case( n2 )
+
+ case( 4 )
+ call mxm4(A,n1,B,C,n3)
+
+ case( 5 )
+ call mxm5(A,n1,B,C,n3)
+
+ case( 6 )
+ call mxm6(A,n1,B,C,n3)
+
+ case( 7 )
+ call mxm7(A,n1,B,C,n3)
+
+ case( 8 )
+ call mxm8(A,n1,B,C,n3)
+
+ case default
+ call mxmN(A,n1,B,n2,C,n3)
+
+ end select
+
+ end subroutine mxm
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm4(A,n1,B,C,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,4) :: A
+ real(kind=CUSTOM_REAL),dimension(4,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C(i,j) = A(i,1) * B(1,j) &
+ + A(i,2) * B(2,j) &
+ + A(i,3) * B(3,j) &
+ + A(i,4) * B(4,j)
+ enddo
+ enddo
+
+ end subroutine mxm4
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm5(A,n1,B,C,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5) :: A
+ real(kind=CUSTOM_REAL),dimension(5,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C(i,j) = A(i,1) * B(1,j) &
+ + A(i,2) * B(2,j) &
+ + A(i,3) * B(3,j) &
+ + A(i,4) * B(4,j) &
+ + A(i,5) * B(5,j)
+ enddo
+ enddo
+
+ end subroutine mxm5
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm6(A,n1,B,C,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,6) :: A
+ real(kind=CUSTOM_REAL),dimension(6,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C(i,j) = A(i,1) * B(1,j) &
+ + A(i,2) * B(2,j) &
+ + A(i,3) * B(3,j) &
+ + A(i,4) * B(4,j) &
+ + A(i,5) * B(5,j) &
+ + A(i,6) * B(6,j)
+ enddo
+ enddo
+
+ end subroutine mxm6
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm7(A,n1,B,C,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,7) :: A
+ real(kind=CUSTOM_REAL),dimension(7,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C(i,j) = A(i,1) * B(1,j) &
+ + A(i,2) * B(2,j) &
+ + A(i,3) * B(3,j) &
+ + A(i,4) * B(4,j) &
+ + A(i,5) * B(5,j) &
+ + A(i,6) * B(6,j) &
+ + A(i,7) * B(7,j)
+ enddo
+ enddo
+
+ end subroutine mxm7
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm8(A,n1,B,C,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,8) :: A
+ real(kind=CUSTOM_REAL),dimension(8,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C(i,j) = A(i,1) * B(1,j) &
+ + A(i,2) * B(2,j) &
+ + A(i,3) * B(3,j) &
+ + A(i,4) * B(4,j) &
+ + A(i,5) * B(5,j) &
+ + A(i,6) * B(6,j) &
+ + A(i,7) * B(7,j) &
+ + A(i,8) * B(8,j)
+ enddo
+ enddo
+
+ end subroutine mxm8
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxmN(A,n1,B,n2,C,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,n2) :: A
+ real(kind=CUSTOM_REAL),dimension(n2,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C
+
+ ! local parameters
+ integer :: i,j,k
+ real(kind=CUSTOM_REAL) :: tmp
+
+ ! general matrix-matrix multiplication
+ do j=1,n3
+ do k=1,n2
+ tmp = B(k,j)
+ do i=1,n1
+ C(i,j) = C(i,j) + A(i,k) * tmp
+ enddo
+ enddo
+ enddo
+
+ end subroutine mxmN
+
+
+!----------------------------------------------------------------------------------------------
+
+
+
+! 3-component routines: combines arrays A1,A2,A3 which correspond to 3 different components x/y/z
+
+ subroutine mxm_3comp(A1,A2,A3,n1,B1,B2,B3,n2,C1,C2,C3,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,n2) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(n2,n3) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! chooses optimized version
+ select case( n2 )
+
+ case( 4 )
+ call mxm4_3comp(A1,A2,A3,n1,B1,B2,B3,C1,C2,C3,n3)
+
+ case( 5 )
+ call mxm5_3comp(A1,A2,A3,n1,B1,B2,B3,C1,C2,C3,n3)
+
+ case( 6 )
+ call mxm6_3comp(A1,A2,A3,n1,B1,B2,B3,C1,C2,C3,n3)
+
+ case( 7 )
+ call mxm7_3comp(A1,A2,A3,n1,B1,B2,B3,C1,C2,C3,n3)
+
+ case( 8 )
+ call mxm8_3comp(A1,A2,A3,n1,B1,B2,B3,C1,C2,C3,n3)
+
+ case default
+ call mxmN_3comp(A1,A2,A3,n1,B1,B2,B3,n2,C1,C2,C3,n3)
+
+ end select
+
+ end subroutine mxm_3comp
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm4_3comp(A1,A2,A3,n1,B1,B2,B3,C1,C2,C3,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,4) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(4,n3) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A1(i,1) * B1(1,j) &
+ + A1(i,2) * B1(2,j) &
+ + A1(i,3) * B1(3,j) &
+ + A1(i,4) * B1(4,j)
+
+ C2(i,j) = A2(i,1) * B2(1,j) &
+ + A2(i,2) * B2(2,j) &
+ + A2(i,3) * B2(3,j) &
+ + A2(i,4) * B2(4,j)
+
+ C3(i,j) = A3(i,1) * B3(1,j) &
+ + A3(i,2) * B3(2,j) &
+ + A3(i,3) * B3(3,j) &
+ + A3(i,4) * B3(4,j)
+ enddo
+ enddo
+
+ end subroutine mxm4_3comp
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm5_3comp(A1,A2,A3,n1,B1,B2,B3,C1,C2,C3,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(5,n3) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A1(i,1) * B1(1,j) &
+ + A1(i,2) * B1(2,j) &
+ + A1(i,3) * B1(3,j) &
+ + A1(i,4) * B1(4,j) &
+ + A1(i,5) * B1(5,j)
+
+ C2(i,j) = A2(i,1) * B2(1,j) &
+ + A2(i,2) * B2(2,j) &
+ + A2(i,3) * B2(3,j) &
+ + A2(i,4) * B2(4,j) &
+ + A2(i,5) * B2(5,j)
+
+ C3(i,j) = A3(i,1) * B3(1,j) &
+ + A3(i,2) * B3(2,j) &
+ + A3(i,3) * B3(3,j) &
+ + A3(i,4) * B3(4,j) &
+ + A3(i,5) * B3(5,j)
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp
+
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm6_3comp(A1,A2,A3,n1,B1,B2,B3,C1,C2,C3,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,6) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(6,n3) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A1(i,1) * B1(1,j) &
+ + A1(i,2) * B1(2,j) &
+ + A1(i,3) * B1(3,j) &
+ + A1(i,4) * B1(4,j) &
+ + A1(i,5) * B1(5,j) &
+ + A1(i,6) * B1(6,j)
+
+ C2(i,j) = A2(i,1) * B2(1,j) &
+ + A2(i,2) * B2(2,j) &
+ + A2(i,3) * B2(3,j) &
+ + A2(i,4) * B2(4,j) &
+ + A2(i,5) * B2(5,j) &
+ + A2(i,6) * B2(6,j)
+
+ C3(i,j) = A3(i,1) * B3(1,j) &
+ + A3(i,2) * B3(2,j) &
+ + A3(i,3) * B3(3,j) &
+ + A3(i,4) * B3(4,j) &
+ + A3(i,5) * B3(5,j) &
+ + A3(i,6) * B3(6,j)
+ enddo
+ enddo
+
+ end subroutine mxm6_3comp
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm7_3comp(A1,A2,A3,n1,B1,B2,B3,C1,C2,C3,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,7) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(7,n3) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A1(i,1) * B1(1,j) &
+ + A1(i,2) * B1(2,j) &
+ + A1(i,3) * B1(3,j) &
+ + A1(i,4) * B1(4,j) &
+ + A1(i,5) * B1(5,j) &
+ + A1(i,6) * B1(6,j) &
+ + A1(i,7) * B1(7,j)
+
+ C2(i,j) = A2(i,1) * B2(1,j) &
+ + A2(i,2) * B2(2,j) &
+ + A2(i,3) * B2(3,j) &
+ + A2(i,4) * B2(4,j) &
+ + A2(i,5) * B2(5,j) &
+ + A2(i,6) * B2(6,j) &
+ + A2(i,7) * B2(7,j)
+
+ C3(i,j) = A3(i,1) * B3(1,j) &
+ + A3(i,2) * B3(2,j) &
+ + A3(i,3) * B3(3,j) &
+ + A3(i,4) * B3(4,j) &
+ + A3(i,5) * B3(5,j) &
+ + A3(i,6) * B3(6,j) &
+ + A3(i,7) * B3(7,j)
+ enddo
+ enddo
+
+ end subroutine mxm7_3comp
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm8_3comp(A1,A2,A3,n1,B1,B2,B3,C1,C2,C3,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,8) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(8,n3) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A1(i,1) * B1(1,j) &
+ + A1(i,2) * B1(2,j) &
+ + A1(i,3) * B1(3,j) &
+ + A1(i,4) * B1(4,j) &
+ + A1(i,5) * B1(5,j) &
+ + A1(i,6) * B1(6,j) &
+ + A1(i,7) * B1(7,j) &
+ + A1(i,8) * B1(8,j)
+
+ C2(i,j) = A2(i,1) * B2(1,j) &
+ + A2(i,2) * B2(2,j) &
+ + A2(i,3) * B2(3,j) &
+ + A2(i,4) * B2(4,j) &
+ + A2(i,5) * B2(5,j) &
+ + A2(i,6) * B2(6,j) &
+ + A2(i,7) * B2(7,j) &
+ + A2(i,8) * B2(8,j)
+
+ C3(i,j) = A3(i,1) * B3(1,j) &
+ + A3(i,2) * B3(2,j) &
+ + A3(i,3) * B3(3,j) &
+ + A3(i,4) * B3(4,j) &
+ + A3(i,5) * B3(5,j) &
+ + A3(i,6) * B3(6,j) &
+ + A3(i,7) * B3(7,j) &
+ + A3(i,8) * B3(8,j)
+ enddo
+ enddo
+
+ end subroutine mxm8_3comp
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxmN_3comp(A1,A2,A3,n1,B1,B2,B3,n2,C1,C2,C3,n3)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,n2) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(n2,n3) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j,k
+ real(kind=CUSTOM_REAL) :: tmp1,tmp2,tmp3
+
+ ! general matrix-matrix multiplication
+ do j=1,n3
+ do k=1,n2
+ tmp1 = B1(k,j)
+ tmp2 = B2(k,j)
+ tmp3 = B3(k,j)
+ do i=1,n1
+ C1(i,j) = C1(i,j) + A1(i,k) * tmp1
+ C2(i,j) = C2(i,j) + A2(i,k) * tmp2
+ C3(i,j) = C3(i,j) + A3(i,k) * tmp3
+ enddo
+ enddo
+ enddo
+
+ end subroutine mxmN_3comp
+
+
+
+
+
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_att_memory.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_att_memory.F90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_att_memory.F90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -25,7 +25,12 @@
!
!=====================================================================
+! we switch between vectorized and non-vectorized version by using pre-processor flag FORCE_VECTORIZATION
+! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
+#include "config.fh"
+
+
!--------------------------------------------------------------------------------------------
!
! crust/mantle region
@@ -67,12 +72,12 @@
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
! variable sized array variables
integer :: vx,vy,vz,vnspec
- real(kind=CUSTOM_REAL), dimension(N_SLS,vx,vy,vz,vnspec) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(vx,vy,vz,N_SLS,vnspec) :: factor_common
real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: c44store
@@ -81,7 +86,7 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc
! local parameters
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: factor_common_c44_muv
@@ -89,6 +94,8 @@
#ifdef FORCE_VECTORIZATION
integer :: ijk
+#else
+ integer :: i,j,k
#endif
! use Runge-Kutta scheme to march in time
@@ -97,87 +104,65 @@
! IMPROVE we use mu_v here even if there is some anisotropy
! IMPROVE we should probably use an average value instead
-#ifdef FORCE_VECTORIZATION
do i_SLS = 1,N_SLS
+
! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
+
if(ANISOTROPIC_3D_MANTLE_VAL) then
- do ijk=1,NGLLCUBE
- factor_common_c44_muv(ijk,1,1) = factor_common(i_SLS,ijk,1,1,ispec) * c44store(ijk,1,1,ispec)
- enddo
- else
- do ijk=1,NGLLCUBE
- factor_common_c44_muv(ijk,1,1) = factor_common(i_SLS,ijk,1,1,ispec) * muvstore(ijk,1,1,ispec)
- enddo
- endif
- else
- if(ANISOTROPIC_3D_MANTLE_VAL) then
- do ijk=1,NGLLCUBE
- factor_common_c44_muv(ijk,1,1) = factor_common(i_SLS,1,1,1,ispec) * c44store(ijk,1,1,ispec)
- enddo
- else
- do ijk=1,NGLLCUBE
- factor_common_c44_muv(ijk,1,1) = factor_common(i_SLS,1,1,1,ispec) * muvstore(ijk,1,1,ispec)
- enddo
- endif
- endif
- do ijk=1,NGLLCUBE
- R_xx(i_SLS,ijk,1,1,ispec) = alphaval(i_SLS) * R_xx(i_SLS,ijk,1,1,ispec) + factor_common_c44_muv(ijk,1,1) * &
- (betaval(i_SLS) * epsilondev_xx(ijk,1,1,ispec) + gammaval(i_SLS) * epsilondev_loc(1,ijk,1,1))
+ DO_LOOP_IJK
+ factor_common_c44_muv(INDEX_IJK) = factor_common(INDEX_IJK,i_SLS,ispec) * c44store(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
- R_yy(i_SLS,ijk,1,1,ispec) = alphaval(i_SLS) * R_yy(i_SLS,ijk,1,1,ispec) + factor_common_c44_muv(ijk,1,1) * &
- (betaval(i_SLS) * epsilondev_yy(ijk,1,1,ispec) + gammaval(i_SLS) * epsilondev_loc(2,ijk,1,1))
+ else
- R_xy(i_SLS,ijk,1,1,ispec) = alphaval(i_SLS) * R_xy(i_SLS,ijk,1,1,ispec) + factor_common_c44_muv(ijk,1,1) * &
- (betaval(i_SLS) * epsilondev_xy(ijk,1,1,ispec) + gammaval(i_SLS) * epsilondev_loc(3,ijk,1,1))
+ DO_LOOP_IJK
+ factor_common_c44_muv(INDEX_IJK) = factor_common(INDEX_IJK,i_SLS,ispec) * muvstore(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
- R_xz(i_SLS,ijk,1,1,ispec) = alphaval(i_SLS) * R_xz(i_SLS,ijk,1,1,ispec) + factor_common_c44_muv(ijk,1,1) * &
- (betaval(i_SLS) * epsilondev_xz(ijk,1,1,ispec) + gammaval(i_SLS) * epsilondev_loc(4,ijk,1,1))
+ endif
- R_yz(i_SLS,ijk,1,1,ispec) = alphaval(i_SLS) * R_yz(i_SLS,ijk,1,1,ispec) + factor_common_c44_muv(ijk,1,1) * &
- (betaval(i_SLS) * epsilondev_yz(ijk,1,1,ispec) + gammaval(i_SLS) * epsilondev_loc(5,ijk,1,1))
- enddo
- enddo ! i_SLS
+ else
-#else
-
- do i_SLS = 1,N_SLS
- ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
- if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
if(ANISOTROPIC_3D_MANTLE_VAL) then
- factor_common_c44_muv(:,:,:) = factor_common(i_SLS,:,:,:,ispec) * c44store(:,:,:,ispec)
+
+ DO_LOOP_IJK
+ factor_common_c44_muv(INDEX_IJK) = factor_common(1,1,1,i_SLS,ispec) * c44store(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
+
else
- factor_common_c44_muv(:,:,:) = factor_common(i_SLS,:,:,:,ispec) * muvstore(:,:,:,ispec)
+
+ DO_LOOP_IJK
+ factor_common_c44_muv(INDEX_IJK) = factor_common(1,1,1,i_SLS,ispec) * muvstore(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
+
endif
- else
- if(ANISOTROPIC_3D_MANTLE_VAL) then
- factor_common_c44_muv(:,:,:) = factor_common(i_SLS,1,1,1,ispec) * c44store(:,:,:,ispec)
- else
- factor_common_c44_muv(:,:,:) = factor_common(i_SLS,1,1,1,ispec) * muvstore(:,:,:,ispec)
- endif
endif
- R_xx(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xx(i_SLS,:,:,:,ispec) + factor_common_c44_muv(:,:,:) * &
- (betaval(i_SLS) * epsilondev_xx(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(1,:,:,:))
+ ! updates memory variables
+ DO_LOOP_IJK
- R_yy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yy(i_SLS,:,:,:,ispec) + factor_common_c44_muv(:,:,:) * &
- (betaval(i_SLS) * epsilondev_yy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(2,:,:,:))
+ R_xx(INDEX_IJK,i_SLS,ispec) = alphaval(i_SLS) * R_xx(INDEX_IJK,i_SLS,ispec) + factor_common_c44_muv(INDEX_IJK) * &
+ (betaval(i_SLS) * epsilondev_xx(INDEX_IJK,ispec) + gammaval(i_SLS) * epsilondev_loc(INDEX_IJK,1))
- R_xy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xy(i_SLS,:,:,:,ispec) + factor_common_c44_muv(:,:,:) * &
- (betaval(i_SLS) * epsilondev_xy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(3,:,:,:))
+ R_yy(INDEX_IJK,i_SLS,ispec) = alphaval(i_SLS) * R_yy(INDEX_IJK,i_SLS,ispec) + factor_common_c44_muv(INDEX_IJK) * &
+ (betaval(i_SLS) * epsilondev_yy(INDEX_IJK,ispec) + gammaval(i_SLS) * epsilondev_loc(INDEX_IJK,2))
- R_xz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xz(i_SLS,:,:,:,ispec) + factor_common_c44_muv(:,:,:) * &
- (betaval(i_SLS) * epsilondev_xz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(4,:,:,:))
+ R_xy(INDEX_IJK,i_SLS,ispec) = alphaval(i_SLS) * R_xy(INDEX_IJK,i_SLS,ispec) + factor_common_c44_muv(INDEX_IJK) * &
+ (betaval(i_SLS) * epsilondev_xy(INDEX_IJK,ispec) + gammaval(i_SLS) * epsilondev_loc(INDEX_IJK,3))
- R_yz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yz(i_SLS,:,:,:,ispec) + factor_common_c44_muv(:,:,:) * &
- (betaval(i_SLS) * epsilondev_yz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(5,:,:,:))
+ R_xz(INDEX_IJK,i_SLS,ispec) = alphaval(i_SLS) * R_xz(INDEX_IJK,i_SLS,ispec) + factor_common_c44_muv(INDEX_IJK) * &
+ (betaval(i_SLS) * epsilondev_xz(INDEX_IJK,ispec) + gammaval(i_SLS) * epsilondev_loc(INDEX_IJK,4))
- enddo ! i_SLS
+ R_yz(INDEX_IJK,i_SLS,ispec) = alphaval(i_SLS) * R_yz(INDEX_IJK,i_SLS,ispec) + factor_common_c44_muv(INDEX_IJK) * &
+ (betaval(i_SLS) * epsilondev_yz(INDEX_IJK,ispec) + gammaval(i_SLS) * epsilondev_loc(INDEX_IJK,5))
-#endif
+ ENDDO_LOOP_IJK
+ enddo ! i_SLS
+
end subroutine compute_element_att_memory_cm
@@ -189,7 +174,6 @@
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, &
vx,vy,vz,vnspec,factor_common, &
c44store,muvstore, &
-! epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
epsilondev_loc, &
deltat)
! crust mantle
@@ -221,30 +205,33 @@
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION) :: &
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION) :: &
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk
! variable sized array variables
integer :: vx,vy,vz,vnspec
- real(kind=CUSTOM_REAL), dimension(N_SLS,vx,vy,vz,vnspec) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(vx,vy,vz,N_SLS,vnspec) :: factor_common
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: c44store
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: muvstore
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
-! epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
-
real(kind=CUSTOM_REAL) :: deltat
-! local parameters
+ ! local parameters
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: factor_common_c44_muv
integer :: i_SLS
+#ifdef FORCE_VECTORIZATION
+ integer :: ijk
+#else
+ integer :: i,j,k
+#endif
+
! use Runge-Kutta scheme to march in time
! get coefficients for that standard linear solid
@@ -256,44 +243,61 @@
! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
if(ANISOTROPIC_3D_MANTLE_VAL) then
- factor_common_c44_muv(:,:,:) = factor_common(i_SLS,:,:,:,ispec) * c44store(:,:,:,ispec)
+
+ DO_LOOP_IJK
+ factor_common_c44_muv(INDEX_IJK) = factor_common(INDEX_IJK,i_SLS,ispec) * c44store(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
else
- factor_common_c44_muv(:,:,:) = factor_common(i_SLS,:,:,:,ispec) * muvstore(:,:,:,ispec)
+ DO_LOOP_IJK
+ factor_common_c44_muv(INDEX_IJK) = factor_common(INDEX_IJK,i_SLS,ispec) * muvstore(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
endif
+
else
+
if(ANISOTROPIC_3D_MANTLE_VAL) then
- factor_common_c44_muv(:,:,:) = factor_common(i_SLS,1,1,1,ispec) * c44store(:,:,:,ispec)
+ DO_LOOP_IJK
+ factor_common_c44_muv(INDEX_IJK) = factor_common(1,1,1,i_SLS,ispec) * c44store(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
else
- factor_common_c44_muv(:,:,:) = factor_common(i_SLS,1,1,1,ispec) * muvstore(:,:,:,ispec)
+ DO_LOOP_IJK
+ factor_common_c44_muv(INDEX_IJK) = factor_common(1,1,1,i_SLS,ispec) * muvstore(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
endif
+
endif
- R_xx_lddrk(i_SLS,:,:,:,ispec) = ALPHA_LDDRK(istage) * R_xx_lddrk(i_SLS,:,:,:,ispec) &
- + deltat * ( factor_common_c44_muv(:,:,:) * epsilondev_loc(1,:,:,:) &
- - R_xx(i_SLS,:,:,:,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ ! updates memory variables
+ DO_LOOP_IJK
- R_yy_lddrk(i_SLS,:,:,:,ispec) = ALPHA_LDDRK(istage) * R_yy_lddrk(i_SLS,:,:,:,ispec) &
- + deltat * ( factor_common_c44_muv(:,:,:) * epsilondev_loc(2,:,:,:) &
- - R_yy(i_SLS,:,:,:,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ R_xx_lddrk(INDEX_IJK,i_SLS,ispec) = ALPHA_LDDRK(istage) * R_xx_lddrk(INDEX_IJK,i_SLS,ispec) &
+ + deltat * ( factor_common_c44_muv(INDEX_IJK) * epsilondev_loc(INDEX_IJK,1) &
+ - R_xx(INDEX_IJK,i_SLS,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
- R_xy_lddrk(i_SLS,:,:,:,ispec) = ALPHA_LDDRK(istage) * R_xy_lddrk(i_SLS,:,:,:,ispec) &
- + deltat * ( factor_common_c44_muv(:,:,:) * epsilondev_loc(3,:,:,:) &
- - R_xy(i_SLS,:,:,:,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ R_yy_lddrk(INDEX_IJK,i_SLS,ispec) = ALPHA_LDDRK(istage) * R_yy_lddrk(INDEX_IJK,i_SLS,ispec) &
+ + deltat * ( factor_common_c44_muv(INDEX_IJK) * epsilondev_loc(INDEX_IJK,2) &
+ - R_yy(INDEX_IJK,i_SLS,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
- R_xz_lddrk(i_SLS,:,:,:,ispec) = ALPHA_LDDRK(istage) * R_xz_lddrk(i_SLS,:,:,:,ispec) &
- + deltat * ( factor_common_c44_muv(:,:,:) * epsilondev_loc(4,:,:,:) &
- - R_xz(i_SLS,:,:,:,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ R_xy_lddrk(INDEX_IJK,i_SLS,ispec) = ALPHA_LDDRK(istage) * R_xy_lddrk(INDEX_IJK,i_SLS,ispec) &
+ + deltat * ( factor_common_c44_muv(INDEX_IJK) * epsilondev_loc(INDEX_IJK,3) &
+ - R_xy(INDEX_IJK,i_SLS,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
- R_yz_lddrk(i_SLS,:,:,:,ispec) = ALPHA_LDDRK(istage) * R_yz_lddrk(i_SLS,:,:,:,ispec) &
- + deltat * ( factor_common_c44_muv(:,:,:) * epsilondev_loc(5,:,:,:) &
- - R_yz(i_SLS,:,:,:,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ R_xz_lddrk(INDEX_IJK,i_SLS,ispec) = ALPHA_LDDRK(istage) * R_xz_lddrk(INDEX_IJK,i_SLS,ispec) &
+ + deltat * ( factor_common_c44_muv(INDEX_IJK) * epsilondev_loc(INDEX_IJK,4) &
+ - R_xz(INDEX_IJK,i_SLS,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
- R_xx(i_SLS,:,:,:,ispec) = R_xx(i_SLS,:,:,:,ispec) + BETA_LDDRK(istage) * R_xx_lddrk(i_SLS,:,:,:,ispec)
- R_yy(i_SLS,:,:,:,ispec) = R_yy(i_SLS,:,:,:,ispec) + BETA_LDDRK(istage) * R_yy_lddrk(i_SLS,:,:,:,ispec)
- R_xy(i_SLS,:,:,:,ispec) = R_xy(i_SLS,:,:,:,ispec) + BETA_LDDRK(istage) * R_xy_lddrk(i_SLS,:,:,:,ispec)
- R_xz(i_SLS,:,:,:,ispec) = R_xz(i_SLS,:,:,:,ispec) + BETA_LDDRK(istage) * R_xz_lddrk(i_SLS,:,:,:,ispec)
- R_yz(i_SLS,:,:,:,ispec) = R_yz(i_SLS,:,:,:,ispec) + BETA_LDDRK(istage) * R_yz_lddrk(i_SLS,:,:,:,ispec)
+ R_yz_lddrk(INDEX_IJK,i_SLS,ispec) = ALPHA_LDDRK(istage) * R_yz_lddrk(INDEX_IJK,i_SLS,ispec) &
+ + deltat * ( factor_common_c44_muv(INDEX_IJK) * epsilondev_loc(INDEX_IJK,5) &
+ - R_yz(INDEX_IJK,i_SLS,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ R_xx(INDEX_IJK,i_SLS,ispec) = R_xx(INDEX_IJK,i_SLS,ispec) + BETA_LDDRK(istage) * R_xx_lddrk(INDEX_IJK,i_SLS,ispec)
+ R_yy(INDEX_IJK,i_SLS,ispec) = R_yy(INDEX_IJK,i_SLS,ispec) + BETA_LDDRK(istage) * R_yy_lddrk(INDEX_IJK,i_SLS,ispec)
+ R_xy(INDEX_IJK,i_SLS,ispec) = R_xy(INDEX_IJK,i_SLS,ispec) + BETA_LDDRK(istage) * R_xy_lddrk(INDEX_IJK,i_SLS,ispec)
+ R_xz(INDEX_IJK,i_SLS,ispec) = R_xz(INDEX_IJK,i_SLS,ispec) + BETA_LDDRK(istage) * R_xz_lddrk(INDEX_IJK,i_SLS,ispec)
+ R_yz(INDEX_IJK,i_SLS,ispec) = R_yz(INDEX_IJK,i_SLS,ispec) + BETA_LDDRK(istage) * R_yz_lddrk(INDEX_IJK,i_SLS,ispec)
+
+ ENDDO_LOOP_IJK
+
enddo ! i_SLS
end subroutine compute_element_att_memory_cm_lddrk
@@ -337,13 +341,12 @@
! element id
integer :: ispec
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
- R_xx,R_yy,R_xy,R_xz,R_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_ATTENUATION) :: R_xx,R_yy,R_xy,R_xz,R_yz
! variable sized array variables
integer :: vx,vy,vz,vnspec
- real(kind=CUSTOM_REAL), dimension(N_SLS,vx,vy,vz,vnspec) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(vx,vy,vz,N_SLS,vnspec) :: factor_common
real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: muvstore
@@ -352,7 +355,7 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc
! local parameters
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: factor_common_use
@@ -361,6 +364,8 @@
#ifdef FORCE_VECTORIZATION
integer :: ijk
+#else
+ integer :: i,j,k
#endif
! use Runge-Kutta scheme to march in time
@@ -372,66 +377,45 @@
! note: epsilondev_loc is calculated based on displ( n + 1 ), thus corresponds to strain at time (n + 1)
! epsilondev_xx,.. are stored from previous step, thus corresponds now to strain at time n
-#ifdef FORCE_VECTORIZATION
-
do i_SLS = 1,N_SLS
+
! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- do ijk=1,NGLLCUBE
- factor_common_use(ijk,1,1) = factor_common(i_SLS,ijk,1,1,ispec) * muvstore(ijk,1,1,ispec)
- enddo
- else
- do ijk=1,NGLLCUBE
- factor_common_use(ijk,1,1) = factor_common(i_SLS,1,1,1,ispec) * muvstore(ijk,1,1,ispec)
- enddo
- endif
- do ijk=1,NGLLCUBE
- R_xx(i_SLS,ijk,1,1,ispec) = alphaval(i_SLS) * R_xx(i_SLS,ijk,1,1,ispec) + factor_common_use(ijk,1,1) * &
- (betaval(i_SLS) * epsilondev_xx(ijk,1,1,ispec) + gammaval(i_SLS) * epsilondev_loc(1,ijk,1,1))
+ DO_LOOP_IJK
+ factor_common_use(INDEX_IJK) = factor_common(INDEX_IJK,i_SLS,ispec) * muvstore(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
- R_yy(i_SLS,ijk,1,1,ispec) = alphaval(i_SLS) * R_yy(i_SLS,ijk,1,1,ispec) + factor_common_use(ijk,1,1) * &
- (betaval(i_SLS) * epsilondev_yy(ijk,1,1,ispec) + gammaval(i_SLS) * epsilondev_loc(2,ijk,1,1))
+ else
- R_xy(i_SLS,ijk,1,1,ispec) = alphaval(i_SLS) * R_xy(i_SLS,ijk,1,1,ispec) + factor_common_use(ijk,1,1) * &
- (betaval(i_SLS) * epsilondev_xy(ijk,1,1,ispec) + gammaval(i_SLS) * epsilondev_loc(3,ijk,1,1))
+ DO_LOOP_IJK
+ factor_common_use(INDEX_IJK) = factor_common(1,1,1,i_SLS,ispec) * muvstore(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
- R_xz(i_SLS,ijk,1,1,ispec) = alphaval(i_SLS) * R_xz(i_SLS,ijk,1,1,ispec) + factor_common_use(ijk,1,1) * &
- (betaval(i_SLS) * epsilondev_xz(ijk,1,1,ispec) + gammaval(i_SLS) * epsilondev_loc(4,ijk,1,1))
+ endif
- R_yz(i_SLS,ijk,1,1,ispec) = alphaval(i_SLS) * R_yz(i_SLS,ijk,1,1,ispec) + factor_common_use(ijk,1,1) * &
- (betaval(i_SLS) * epsilondev_yz(ijk,1,1,ispec) + gammaval(i_SLS) * epsilondev_loc(5,ijk,1,1))
- enddo
- enddo
+ ! updates memory variables
+ DO_LOOP_IJK
-#else
+ R_xx(INDEX_IJK,i_SLS,ispec) = alphaval(i_SLS) * R_xx(INDEX_IJK,i_SLS,ispec) + factor_common_use(INDEX_IJK) * &
+ (betaval(i_SLS) * epsilondev_xx(INDEX_IJK,ispec) + gammaval(i_SLS) * epsilondev_loc(INDEX_IJK,1))
- do i_SLS = 1,N_SLS
- ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
- if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- factor_common_use(:,:,:) = factor_common(i_SLS,:,:,:,ispec) * muvstore(:,:,:,ispec)
- else
- factor_common_use(:,:,:) = factor_common(i_SLS,1,1,1,ispec) * muvstore(:,:,:,ispec)
- endif
+ R_yy(INDEX_IJK,i_SLS,ispec) = alphaval(i_SLS) * R_yy(INDEX_IJK,i_SLS,ispec) + factor_common_use(INDEX_IJK) * &
+ (betaval(i_SLS) * epsilondev_yy(INDEX_IJK,ispec) + gammaval(i_SLS) * epsilondev_loc(INDEX_IJK,2))
- R_xx(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xx(i_SLS,:,:,:,ispec) + factor_common_use(:,:,:) * &
- (betaval(i_SLS) * epsilondev_xx(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(1,:,:,:))
+ R_xy(INDEX_IJK,i_SLS,ispec) = alphaval(i_SLS) * R_xy(INDEX_IJK,i_SLS,ispec) + factor_common_use(INDEX_IJK) * &
+ (betaval(i_SLS) * epsilondev_xy(INDEX_IJK,ispec) + gammaval(i_SLS) * epsilondev_loc(INDEX_IJK,3))
- R_yy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yy(i_SLS,:,:,:,ispec) + factor_common_use(:,:,:) * &
- (betaval(i_SLS) * epsilondev_yy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(2,:,:,:))
+ R_xz(INDEX_IJK,i_SLS,ispec) = alphaval(i_SLS) * R_xz(INDEX_IJK,i_SLS,ispec) + factor_common_use(INDEX_IJK) * &
+ (betaval(i_SLS) * epsilondev_xz(INDEX_IJK,ispec) + gammaval(i_SLS) * epsilondev_loc(INDEX_IJK,4))
- R_xy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xy(i_SLS,:,:,:,ispec) + factor_common_use(:,:,:) * &
- (betaval(i_SLS) * epsilondev_xy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(3,:,:,:))
+ R_yz(INDEX_IJK,i_SLS,ispec) = alphaval(i_SLS) * R_yz(INDEX_IJK,i_SLS,ispec) + factor_common_use(INDEX_IJK) * &
+ (betaval(i_SLS) * epsilondev_yz(INDEX_IJK,ispec) + gammaval(i_SLS) * epsilondev_loc(INDEX_IJK,5))
- R_xz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xz(i_SLS,:,:,:,ispec) + factor_common_use(:,:,:) * &
- (betaval(i_SLS) * epsilondev_xz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(4,:,:,:))
+ ENDDO_LOOP_IJK
- R_yz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yz(i_SLS,:,:,:,ispec) + factor_common_use(:,:,:) * &
- (betaval(i_SLS) * epsilondev_yz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(5,:,:,:))
- enddo
+ enddo ! N_SLS
-#endif
-
end subroutine compute_element_att_memory_ic
@@ -443,7 +427,6 @@
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, &
vx,vy,vz,vnspec,factor_common, &
muvstore, &
-! epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
epsilondev_loc, &
deltat)
! inner core
@@ -471,32 +454,33 @@
! element id
integer :: ispec
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_ATTENUATION) :: &
R_xx,R_yy,R_xy,R_xz,R_yz
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_ATTENUATION) :: &
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk
! variable sized array variables
integer :: vx,vy,vz,vnspec
- real(kind=CUSTOM_REAL), dimension(N_SLS,vx,vy,vz,vnspec) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(vx,vy,vz,N_SLS,vnspec) :: factor_common
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: muvstore
-! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: epsilondev
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
-! epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
-
real(kind=CUSTOM_REAL) :: deltat
-! local parameters
+ ! local parameters
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: factor_common_use
integer :: i_SLS
+
+#ifdef FORCE_VECTORIZATION
+ integer :: ijk
+#else
integer :: i,j,k
+#endif
! use Runge-Kutta scheme to march in time
@@ -511,43 +495,46 @@
! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
- factor_common_use(:,:,:) = factor_common(i_SLS,:,:,:,ispec) * muvstore(:,:,:,ispec)
+ DO_LOOP_IJK
+ factor_common_use(INDEX_IJK) = factor_common(INDEX_IJK,i_SLS,ispec) * muvstore(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
else
- factor_common_use(:,:,:) = factor_common(i_SLS,1,1,1,ispec) * muvstore(:,:,:,ispec)
+ DO_LOOP_IJK
+ factor_common_use(INDEX_IJK) = factor_common(1,1,1,i_SLS,ispec) * muvstore(INDEX_IJK,ispec)
+ ENDDO_LOOP_IJK
endif
- do k = 1,NGLLZ
- do j = 1,NGLLY
- do i = 1,NGLLX
- R_xx_lddrk(i_SLS,i,j,k,ispec) = ALPHA_LDDRK(istage) * R_xx_lddrk(i_SLS,i,j,k,ispec) &
- + deltat * ( factor_common_use(i,j,k)*epsilondev_loc(1,i,j,k) &
- - R_xx(i_SLS,i,j,k,ispec)*(1.0_CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ ! updates memory variables
+ DO_LOOP_IJK
- R_yy_lddrk(i_SLS,i,j,k,ispec) = ALPHA_LDDRK(istage) * R_yy_lddrk(i_SLS,i,j,k,ispec) &
- + deltat * ( factor_common_use(i,j,k)*epsilondev_loc(2,i,j,k) &
- - R_yy(i_SLS,i,j,k,ispec)*(1.0_CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ R_xx_lddrk(INDEX_IJK,i_SLS,ispec) = ALPHA_LDDRK(istage) * R_xx_lddrk(INDEX_IJK,i_SLS,ispec) &
+ + deltat * ( factor_common_use(INDEX_IJK)*epsilondev_loc(INDEX_IJK,1) &
+ - R_xx(INDEX_IJK,i_SLS,ispec)*(1.0_CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
- R_xy_lddrk(i_SLS,i,j,k,ispec) = ALPHA_LDDRK(istage) * R_xy_lddrk(i_SLS,i,j,k,ispec) &
- + deltat * ( factor_common_use(i,j,k)*epsilondev_loc(3,i,j,k) &
- - R_xy(i_SLS,i,j,k,ispec)*(1.0_CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ R_yy_lddrk(INDEX_IJK,i_SLS,ispec) = ALPHA_LDDRK(istage) * R_yy_lddrk(INDEX_IJK,i_SLS,ispec) &
+ + deltat * ( factor_common_use(INDEX_IJK)*epsilondev_loc(INDEX_IJK,2) &
+ - R_yy(INDEX_IJK,i_SLS,ispec)*(1.0_CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
- R_xz_lddrk(i_SLS,i,j,k,ispec) = ALPHA_LDDRK(istage) * R_xz_lddrk(i_SLS,i,j,k,ispec) &
- + deltat * ( factor_common_use(i,j,k)*epsilondev_loc(4,i,j,k) &
- - R_xz(i_SLS,i,j,k,ispec)*(1.0_CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ R_xy_lddrk(INDEX_IJK,i_SLS,ispec) = ALPHA_LDDRK(istage) * R_xy_lddrk(INDEX_IJK,i_SLS,ispec) &
+ + deltat * ( factor_common_use(INDEX_IJK)*epsilondev_loc(INDEX_IJK,3) &
+ - R_xy(INDEX_IJK,i_SLS,ispec)*(1.0_CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
- R_yz_lddrk(i_SLS,i,j,k,ispec) = ALPHA_LDDRK(istage) * R_yz_lddrk(i_SLS,i,j,k,ispec) &
- + deltat * ( factor_common_use(i,j,k)*epsilondev_loc(5,i,j,k) &
- - R_yz(i_SLS,i,j,k,ispec)*(1.0_CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ R_xz_lddrk(INDEX_IJK,i_SLS,ispec) = ALPHA_LDDRK(istage) * R_xz_lddrk(INDEX_IJK,i_SLS,ispec) &
+ + deltat * ( factor_common_use(INDEX_IJK)*epsilondev_loc(INDEX_IJK,4) &
+ - R_xz(INDEX_IJK,i_SLS,ispec)*(1.0_CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
- R_xx(i_SLS,i,j,k,ispec) = R_xx(i_SLS,i,j,k,ispec) + BETA_LDDRK(istage) * R_xx_lddrk(i_SLS,i,j,k,ispec)
- R_yy(i_SLS,i,j,k,ispec) = R_yy(i_SLS,i,j,k,ispec) + BETA_LDDRK(istage) * R_yy_lddrk(i_SLS,i,j,k,ispec)
- R_xy(i_SLS,i,j,k,ispec) = R_xy(i_SLS,i,j,k,ispec) + BETA_LDDRK(istage) * R_xy_lddrk(i_SLS,i,j,k,ispec)
- R_xz(i_SLS,i,j,k,ispec) = R_xz(i_SLS,i,j,k,ispec) + BETA_LDDRK(istage) * R_xz_lddrk(i_SLS,i,j,k,ispec)
- R_yz(i_SLS,i,j,k,ispec) = R_yz(i_SLS,i,j,k,ispec) + BETA_LDDRK(istage) * R_yz_lddrk(i_SLS,i,j,k,ispec)
- enddo
- enddo
- enddo
+ R_yz_lddrk(INDEX_IJK,i_SLS,ispec) = ALPHA_LDDRK(istage) * R_yz_lddrk(INDEX_IJK,i_SLS,ispec) &
+ + deltat * ( factor_common_use(INDEX_IJK)*epsilondev_loc(INDEX_IJK,5) &
+ - R_yz(INDEX_IJK,i_SLS,ispec)*(1.0_CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)) )
+ R_xx(INDEX_IJK,i_SLS,ispec) = R_xx(INDEX_IJK,i_SLS,ispec) + BETA_LDDRK(istage) * R_xx_lddrk(INDEX_IJK,i_SLS,ispec)
+ R_yy(INDEX_IJK,i_SLS,ispec) = R_yy(INDEX_IJK,i_SLS,ispec) + BETA_LDDRK(istage) * R_yy_lddrk(INDEX_IJK,i_SLS,ispec)
+ R_xy(INDEX_IJK,i_SLS,ispec) = R_xy(INDEX_IJK,i_SLS,ispec) + BETA_LDDRK(istage) * R_xy_lddrk(INDEX_IJK,i_SLS,ispec)
+ R_xz(INDEX_IJK,i_SLS,ispec) = R_xz(INDEX_IJK,i_SLS,ispec) + BETA_LDDRK(istage) * R_xz_lddrk(INDEX_IJK,i_SLS,ispec)
+ R_yz(INDEX_IJK,i_SLS,ispec) = R_yz(INDEX_IJK,i_SLS,ispec) + BETA_LDDRK(istage) * R_yz_lddrk(INDEX_IJK,i_SLS,ispec)
+
+ ENDDO_LOOP_IJK
+
enddo
end subroutine compute_element_att_memory_ic_lddrk
@@ -637,9 +624,9 @@
!
! ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
! if( ATTENUATION_3D_VAL .or. ATTENUATION_1D_WITH_3D_STORAGE_VAL ) then
-! factor_common_c44_muv = factor_common(i_SLS,i,j,k,ispec) * c44_muv
+! factor_common_c44_muv = factor_common(i,j,k,i_SLS,ispec) * c44_muv
! else
-! factor_common_c44_muv = factor_common(i_SLS,1,1,1,ispec) * c44_muv
+! factor_common_c44_muv = factor_common(1,1,1,i_SLS,ispec) * c44_muv
! endif
!
! ! adds contributions from current strain
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_strain.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_strain.F90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element_strain.F90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -53,50 +53,27 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ),intent(out) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5),intent(out) :: epsilondev_loc
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(out) :: eps_trace_over_3_loc
-! local variable
+ ! local variable
integer :: iglob
- integer :: i,j,k
real(kind=CUSTOM_REAL) :: templ
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
- equivalence(dummyx_loc,B1_m1_m2_5points)
- equivalence(dummyy_loc,B2_m1_m2_5points)
- equivalence(dummyz_loc,B3_m1_m2_5points)
+ real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl
+ real(kind=CUSTOM_REAL) :: duxdxl,duydyl,duzdzl,duxdyl,duydxl,duzdxl,duxdzl,duzdyl,duydzl, &
+ duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, &
+ duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
-
- equivalence(tempx1,C1_m1_m2_5points)
- equivalence(tempy1,C2_m1_m2_5points)
- equivalence(tempz1,C3_m1_m2_5points)
-
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
- C1_mxm_m2_m1_5points,C2_mxm_m2_m1_5points,C3_mxm_m2_m1_5points
-
- equivalence(tempx3,C1_mxm_m2_m1_5points)
- equivalence(tempy3,C2_mxm_m2_m1_5points)
- equivalence(tempz3,C3_mxm_m2_m1_5points)
-
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
- A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
-
- equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
- equivalence(dummyy_loc,A2_mxm_m2_m1_5points)
- equivalence(dummyz_loc,A3_mxm_m2_m1_5points)
-
- real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
- real(kind=CUSTOM_REAL) duxdxl,duydyl,duzdzl,duxdyl,duydxl,duzdxl,duxdzl,duzdyl,duydzl,&
- duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl,&
- duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
-
#ifdef FORCE_VECTORIZATION
integer :: ijk
+#else
+ integer :: i,j,k
#endif
DO_LOOP_IJK
@@ -109,75 +86,13 @@
ENDDO_LOOP_IJK
! deville optimizations
- do j=1,m2
- do i=1,m1
- C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B1_m1_m2_5points(5,j)
+ ! computes 1. matrix multiplication for tempx1,..
+ call mxm5_3comp_singleA(hprime_xx,m1,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1,m2)
+ ! computes 2. matrix multiplication for tempx2,..
+ call mxm5_3comp_3dmat_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m1,hprime_xxT,m1,tempx2,tempy2,tempz2,NGLLX)
+ ! computes 3. matrix multiplication for tempx1,..
+ call mxm5_3comp_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m2,hprime_xxT,tempx3,tempy3,tempz3,m1)
- C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B2_m1_m2_5points(5,j)
-
- C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B3_m1_m2_5points(5,j)
- enddo
- enddo
-
- do j=1,m1
- do i=1,m1
- ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
- do k = 1,NGLLX
- tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyx_loc(i,5,k)*hprime_xxT(5,j)
-
- tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyy_loc(i,5,k)*hprime_xxT(5,j)
-
- tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyz_loc(i,5,k)*hprime_xxT(5,j)
- enddo
- enddo
- enddo
-
- do j=1,m1
- do i=1,m2
- C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-
- C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-
- C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
- enddo
- enddo
-
DO_LOOP_IJK
! get derivatives of ux, uy and uz with respect to x, y and z
@@ -191,11 +106,6 @@
gammayl = gammay(INDEX_IJK,ispec)
gammazl = gammaz(INDEX_IJK,ispec)
- ! compute the jacobian
- jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
-
duxdxl = xixl*tempx1(INDEX_IJK) + etaxl*tempx2(INDEX_IJK) + gammaxl*tempx3(INDEX_IJK)
duxdyl = xiyl*tempx1(INDEX_IJK) + etayl*tempx2(INDEX_IJK) + gammayl*tempx3(INDEX_IJK)
duxdzl = xizl*tempx1(INDEX_IJK) + etazl*tempx2(INDEX_IJK) + gammazl*tempx3(INDEX_IJK)
@@ -219,14 +129,164 @@
! strains
templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
eps_trace_over_3_loc(INDEX_IJK) = templ
- epsilondev_loc(1,INDEX_IJK) = duxdxl - templ
- epsilondev_loc(2,INDEX_IJK) = duydyl - templ
- epsilondev_loc(3,INDEX_IJK) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
+ epsilondev_loc(INDEX_IJK,1) = duxdxl - templ
+ epsilondev_loc(INDEX_IJK,2) = duydyl - templ
+ epsilondev_loc(INDEX_IJK,3) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_loc(INDEX_IJK,4) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_loc(INDEX_IJK,5) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
ENDDO_LOOP_IJK
+ contains
+
+!--------------------------------------------------------------------------------------------
+!
+! matrix-matrix multiplications
+!
+! subroutines adapted from Deville, Fischer and Mund, High-order methods
+! for incompressible fluid flow, Cambridge University Press (2002),
+! pages 386 and 389 and Figure 8.3.1
+!
+!--------------------------------------------------------------------------------------------
+!
+! note: the matrix-matrix multiplications are used for very small matrices ( 5 x 5 x 5 elements);
+! thus, calling external optimized libraries for these multiplications are in general slower
+!
+! please leave the routines here to help compilers inlining the code
+
+ subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5) :: A
+ real(kind=CUSTOM_REAL),dimension(5,n3) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A(i,1) * B1(1,j) &
+ + A(i,2) * B1(2,j) &
+ + A(i,3) * B1(3,j) &
+ + A(i,4) * B1(4,j) &
+ + A(i,5) * B1(5,j)
+
+ C2(i,j) = A(i,1) * B2(1,j) &
+ + A(i,2) * B2(2,j) &
+ + A(i,3) * B2(3,j) &
+ + A(i,4) * B2(4,j) &
+ + A(i,5) * B2(5,j)
+
+ C3(i,j) = A(i,1) * B3(1,j) &
+ + A(i,2) * B3(2,j) &
+ + A(i,3) * B3(3,j) &
+ + A(i,4) * B3(4,j) &
+ + A(i,5) * B3(5,j)
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_singleA
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(5,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A1(i,1) * B(1,j) &
+ + A1(i,2) * B(2,j) &
+ + A1(i,3) * B(3,j) &
+ + A1(i,4) * B(4,j) &
+ + A1(i,5) * B(5,j)
+
+ C2(i,j) = A2(i,1) * B(1,j) &
+ + A2(i,2) * B(2,j) &
+ + A2(i,3) * B(3,j) &
+ + A2(i,4) * B(4,j) &
+ + A2(i,5) * B(5,j)
+
+ C3(i,j) = A3(i,1) * B(1,j) &
+ + A3(i,2) * B(2,j) &
+ + A3(i,3) * B(3,j) &
+ + A3(i,4) * B(4,j) &
+ + A3(i,5) * B(5,j)
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_singleB
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 3-dimensional arrays (5,5,5), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5,n3) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(5,n2) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n2,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j,k
+
+ ! matrix-matrix multiplication
+ do j=1,n2
+ do i=1,n1
+ ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+ do k=1,n3
+ C1(i,j,k) = A1(i,1,k) * B(1,j) &
+ + A1(i,2,k) * B(2,j) &
+ + A1(i,3,k) * B(3,j) &
+ + A1(i,4,k) * B(4,j) &
+ + A1(i,5,k) * B(5,j)
+
+ C2(i,j,k) = A2(i,1,k) * B(1,j) &
+ + A2(i,2,k) * B(2,j) &
+ + A2(i,3,k) * B(3,j) &
+ + A2(i,4,k) * B(4,j) &
+ + A2(i,5,k) * B(5,j)
+
+ C3(i,j,k) = A3(i,1,k) * B(1,j) &
+ + A3(i,2,k) * B(2,j) &
+ + A3(i,3,k) * B(3,j) &
+ + A3(i,4,k) * B(4,j) &
+ + A3(i,5,k) * B(5,j)
+ enddo
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_3dmat_singleB
+
+
end subroutine compute_element_strain_undo_att_Dev
@@ -264,7 +324,7 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ),intent(out) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5),intent(out) :: epsilondev_loc
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(out) :: eps_trace_over_3_loc
! local parameters
@@ -276,7 +336,7 @@
real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l
real(kind=CUSTOM_REAL) hp1,hp2,hp3
- real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+ real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl
real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
@@ -321,7 +381,7 @@
tempz3l = tempz3l + displ(3,iglob)*hp3
enddo
-! get derivatives of ux, uy and uz with respect to x, y and z
+ ! get derivatives of ux, uy and uz with respect to x, y and z
xixl = xix(i,j,k,ispec)
xiyl = xiy(i,j,k,ispec)
xizl = xiz(i,j,k,ispec)
@@ -332,10 +392,10 @@
gammayl = gammay(i,j,k,ispec)
gammazl = gammaz(i,j,k,ispec)
-! compute the jacobian
- jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
+ ! compute the jacobian
+ !jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+ ! - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+ ! + xizl*(etaxl*gammayl-etayl*gammaxl))
duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
@@ -349,7 +409,7 @@
duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l
duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l
-! precompute some sums to save CPU time
+ ! precompute some sums to save CPU time
duxdxl_plus_duydyl = duxdxl + duydyl
duxdxl_plus_duzdzl = duxdxl + duzdzl
duydyl_plus_duzdzl = duydyl + duzdzl
@@ -357,13 +417,13 @@
duzdxl_plus_duxdzl = duzdxl + duxdzl
duzdyl_plus_duydzl = duzdyl + duydzl
-! compute deviatoric strain
+ ! compute deviatoric strain
eps_trace_over_3_loc(i,j,k) = ONE_THIRD * (duxdxl + duydyl + duzdzl)
- epsilondev_loc(1,i,j,k) = duxdxl - eps_trace_over_3_loc(i,j,k)
- epsilondev_loc(2,i,j,k) = duydyl - eps_trace_over_3_loc(i,j,k)
- epsilondev_loc(3,i,j,k) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,i,j,k) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,i,j,k) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
+ epsilondev_loc(i,j,k,1) = duxdxl - eps_trace_over_3_loc(i,j,k)
+ epsilondev_loc(i,j,k,2) = duydyl - eps_trace_over_3_loc(i,j,k)
+ epsilondev_loc(i,j,k,3) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_loc(i,j,k,4) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_loc(i,j,k,5) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
enddo ! NGLLX
enddo ! NGLLY
@@ -379,7 +439,7 @@
!--------------------------------------------------------------------------------------------
- subroutine compute_element_strain_att_Dev(ispec,nglob,nspec, &
+ subroutine compute_element_strain_att_Dev(ispec,nglob,nspec, &
displ,veloc,deltat, &
ibool, &
hprime_xx,hprime_xxT,&
@@ -416,38 +476,22 @@
! local variable
integer :: iglob
- integer :: i,j,k
real(kind=CUSTOM_REAL) :: templ
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
- equivalence(dummyx_loc,B1_m1_m2_5points)
- equivalence(dummyy_loc,B2_m1_m2_5points)
- equivalence(dummyz_loc,B3_m1_m2_5points)
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
- equivalence(tempx1,C1_m1_m2_5points)
- equivalence(tempy1,C2_m1_m2_5points)
- equivalence(tempz1,C3_m1_m2_5points)
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
- C1_mxm_m2_m1_5points,C2_mxm_m2_m1_5points,C3_mxm_m2_m1_5points
- equivalence(tempx3,C1_mxm_m2_m1_5points)
- equivalence(tempy3,C2_mxm_m2_m1_5points)
- equivalence(tempz3,C3_mxm_m2_m1_5points)
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
- A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
- equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
- equivalence(dummyy_loc,A2_mxm_m2_m1_5points)
- equivalence(dummyz_loc,A3_mxm_m2_m1_5points)
- real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
- real(kind=CUSTOM_REAL) duxdxl,duydyl,duzdzl,duxdyl,duydxl,duzdxl,duxdzl,duzdyl,duydzl,&
- duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl,&
- duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
+ real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl
+ real(kind=CUSTOM_REAL) :: duxdxl,duydyl,duzdzl,duxdyl,duydxl,duzdxl,duxdzl,duzdyl,duydzl, &
+ duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, &
+ duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
#ifdef FORCE_VECTORIZATION
integer :: ijk
+#else
+ integer :: i,j,k
#endif
DO_LOOP_IJK
@@ -460,75 +504,13 @@
ENDDO_LOOP_IJK
! deville optimizations
- do j=1,m2
- do i=1,m1
- C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B1_m1_m2_5points(5,j)
+ ! computes 1. matrix multiplication for tempx1,..
+ call mxm5_3comp_singleA(hprime_xx,m1,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1,m2)
+ ! computes 2. matrix multiplication for tempx2,..
+ call mxm5_3comp_3dmat_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m1,hprime_xxT,m1,tempx2,tempy2,tempz2,NGLLX)
+ ! computes 3. matrix multiplication for tempx1,..
+ call mxm5_3comp_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m2,hprime_xxT,tempx3,tempy3,tempz3,m1)
- C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B2_m1_m2_5points(5,j)
-
- C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B3_m1_m2_5points(5,j)
- enddo
- enddo
-
- do j=1,m1
- do i=1,m1
- ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
- do k = 1,NGLLX
- tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyx_loc(i,5,k)*hprime_xxT(5,j)
-
- tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyy_loc(i,5,k)*hprime_xxT(5,j)
-
- tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyz_loc(i,5,k)*hprime_xxT(5,j)
- enddo
- enddo
- enddo
-
- do j=1,m1
- do i=1,m2
- C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-
- C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-
- C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
- enddo
- enddo
-
DO_LOOP_IJK
! get derivatives of ux, uy and uz with respect to x, y and z
@@ -543,9 +525,9 @@
gammazl = gammaz(INDEX_IJK,ispec)
! compute the jacobian
- jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
+ !jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+ ! - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+ ! + xizl*(etaxl*gammayl-etayl*gammaxl))
duxdxl = xixl*tempx1(INDEX_IJK) + etaxl*tempx2(INDEX_IJK) + gammaxl*tempx3(INDEX_IJK)
duxdyl = xiyl*tempx1(INDEX_IJK) + etayl*tempx2(INDEX_IJK) + gammayl*tempx3(INDEX_IJK)
@@ -583,14 +565,163 @@
ENDDO_LOOP_IJK
- end subroutine compute_element_strain_att_Dev
+ contains
+!--------------------------------------------------------------------------------------------
+!
+! matrix-matrix multiplications
+!
+! subroutines adapted from Deville, Fischer and Mund, High-order methods
+! for incompressible fluid flow, Cambridge University Press (2002),
+! pages 386 and 389 and Figure 8.3.1
+!
+!--------------------------------------------------------------------------------------------
+!
+! note: the matrix-matrix multiplications are used for very small matrices ( 5 x 5 x 5 elements);
+! thus, calling external optimized libraries for these multiplications are in general slower
+!
+! please leave the routines here to help compilers inlining the code
+ subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5) :: A
+ real(kind=CUSTOM_REAL),dimension(5,n3) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A(i,1) * B1(1,j) &
+ + A(i,2) * B1(2,j) &
+ + A(i,3) * B1(3,j) &
+ + A(i,4) * B1(4,j) &
+ + A(i,5) * B1(5,j)
+
+ C2(i,j) = A(i,1) * B2(1,j) &
+ + A(i,2) * B2(2,j) &
+ + A(i,3) * B2(3,j) &
+ + A(i,4) * B2(4,j) &
+ + A(i,5) * B2(5,j)
+
+ C3(i,j) = A(i,1) * B3(1,j) &
+ + A(i,2) * B3(2,j) &
+ + A(i,3) * B3(3,j) &
+ + A(i,4) * B3(4,j) &
+ + A(i,5) * B3(5,j)
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_singleA
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(5,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A1(i,1) * B(1,j) &
+ + A1(i,2) * B(2,j) &
+ + A1(i,3) * B(3,j) &
+ + A1(i,4) * B(4,j) &
+ + A1(i,5) * B(5,j)
+
+ C2(i,j) = A2(i,1) * B(1,j) &
+ + A2(i,2) * B(2,j) &
+ + A2(i,3) * B(3,j) &
+ + A2(i,4) * B(4,j) &
+ + A2(i,5) * B(5,j)
+
+ C3(i,j) = A3(i,1) * B(1,j) &
+ + A3(i,2) * B(2,j) &
+ + A3(i,3) * B(3,j) &
+ + A3(i,4) * B(4,j) &
+ + A3(i,5) * B(5,j)
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_singleB
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 3-dimensional arrays (5,5,5), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5,n3) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(5,n2) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n2,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j,k
+
+ ! matrix-matrix multiplication
+ do j=1,n2
+ do i=1,n1
+ ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+ do k=1,n3
+ C1(i,j,k) = A1(i,1,k) * B(1,j) &
+ + A1(i,2,k) * B(2,j) &
+ + A1(i,3,k) * B(3,j) &
+ + A1(i,4,k) * B(4,j) &
+ + A1(i,5,k) * B(5,j)
+
+ C2(i,j,k) = A2(i,1,k) * B(1,j) &
+ + A2(i,2,k) * B(2,j) &
+ + A2(i,3,k) * B(3,j) &
+ + A2(i,4,k) * B(4,j) &
+ + A2(i,5,k) * B(5,j)
+
+ C3(i,j,k) = A3(i,1,k) * B(1,j) &
+ + A3(i,2,k) * B(2,j) &
+ + A3(i,3,k) * B(3,j) &
+ + A3(i,4,k) * B(4,j) &
+ + A3(i,5,k) * B(5,j)
+ enddo
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_3dmat_singleB
+
+ end subroutine compute_element_strain_att_Dev
+
+
!
!--------------------------------------------------------------------------------------------
!
- subroutine compute_element_strain_att_noDev(ispec,nglob,nspec, &
+ subroutine compute_element_strain_att_noDev(ispec,nglob,nspec, &
displ,veloc,deltat, &
ibool, &
hprime_xx,hprime_yy,hprime_zz, &
@@ -621,7 +752,6 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
- !real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc_nplus1
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: epsilondev_xx_loc_nplus1
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: epsilondev_yy_loc_nplus1
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: epsilondev_xy_loc_nplus1
@@ -642,7 +772,7 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc
real(kind=CUSTOM_REAL) hp1,hp2,hp3
- real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+ real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl
real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
@@ -706,11 +836,6 @@
gammayl = gammay(i,j,k,ispec)
gammazl = gammaz(i,j,k,ispec)
- ! compute the jacobian
- jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
- - xiyl*(etaxl*gammazl-etazl*gammaxl) &
- + xizl*(etaxl*gammayl-etayl*gammaxl))
-
duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
@@ -748,5 +873,5 @@
enddo
enddo
- end subroutine compute_element_strain_att_noDev
+ end subroutine compute_element_strain_att_noDev
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -89,7 +89,6 @@
! displacement, velocity and acceleration
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_crust_mantle
-! real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: veloc_crust_mantle
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_crust_mantle
! variable sized array variables
@@ -98,8 +97,8 @@
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATT) :: R_xx,R_yy,R_xy,R_xz,R_yz
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATT) :: &
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: R_xx,R_yy,R_xy,R_xz,R_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: &
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: &
@@ -107,7 +106,7 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: epsilon_trace_over_3
! [alpha,beta,gamma]val reduced to N_SLS and factor_common to N_SLS*NUM_NODES
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,vnspec) :: factor_common
real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
! inner/outer element run flag
@@ -119,54 +118,28 @@
! manually inline the calls to the Deville et al. (2002) routines
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_5points,E2_m1_m2_5points,E3_m1_m2_5points
- equivalence(dummyx_loc,B1_m1_m2_5points)
- equivalence(dummyy_loc,B2_m1_m2_5points)
- equivalence(dummyz_loc,B3_m1_m2_5points)
- equivalence(tempx1,C1_m1_m2_5points)
- equivalence(tempy1,C2_m1_m2_5points)
- equivalence(tempz1,C3_m1_m2_5points)
- equivalence(newtempx1,E1_m1_m2_5points)
- equivalence(newtempy1,E2_m1_m2_5points)
- equivalence(newtempz1,E3_m1_m2_5points)
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
- A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
- C1_mxm_m2_m1_5points,C2_mxm_m2_m1_5points,C3_mxm_m2_m1_5points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
- E1_mxm_m2_m1_5points,E2_mxm_m2_m1_5points,E3_mxm_m2_m1_5points
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NDIM) :: sum_terms
- equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
- equivalence(dummyy_loc,A2_mxm_m2_m1_5points)
- equivalence(dummyz_loc,A3_mxm_m2_m1_5points)
- equivalence(tempx3,C1_mxm_m2_m1_5points)
- equivalence(tempy3,C2_mxm_m2_m1_5points)
- equivalence(tempz3,C3_mxm_m2_m1_5points)
- equivalence(newtempx3,E1_mxm_m2_m1_5points)
- equivalence(newtempy3,E2_mxm_m2_m1_5points)
- equivalence(newtempz3,E3_mxm_m2_m1_5points)
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms
-
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc
real(kind=CUSTOM_REAL) fac1,fac2,fac3
! for gravity
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NDIM) :: rho_s_H
integer :: ispec,iglob
- integer :: i,j,k
integer :: num_elements,ispec_p
integer :: iphase
#ifdef FORCE_VECTORIZATION
integer :: ijk
+#else
+ integer :: i,j,k
#endif
! ****************************************************
@@ -200,75 +173,14 @@
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
- do j=1,m2
- do i=1,m1
- C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B1_m1_m2_5points(5,j)
- C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B2_m1_m2_5points(5,j)
+ ! computes 1. matrix multiplication for tempx1,..
+ call mxm5_3comp_singleA(hprime_xx,m1,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1,m2)
+ ! computes 2. matrix multiplication for tempx2,..
+ call mxm5_3comp_3dmat_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m1,hprime_xxT,m1,tempx2,tempy2,tempz2,NGLLX)
+ ! computes 3. matrix multiplication for tempx1,..
+ call mxm5_3comp_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m2,hprime_xxT,tempx3,tempy3,tempz3,m1)
- C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B3_m1_m2_5points(5,j)
- enddo
- enddo
-
- do j=1,m1
- do i=1,m1
- ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
- do k = 1,NGLLX
- tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyx_loc(i,5,k)*hprime_xxT(5,j)
-
- tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyy_loc(i,5,k)*hprime_xxT(5,j)
-
- tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyz_loc(i,5,k)*hprime_xxT(5,j)
- enddo
- enddo
- enddo
-
- do j=1,m1
- do i=1,m2
- C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-
- C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-
- C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
- enddo
- enddo
-
!
! compute either isotropic, transverse isotropic or anisotropic elements
!
@@ -326,85 +238,24 @@
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
- do j=1,m2
- do i=1,m1
- E1_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_5points(1,j) + &
- hprimewgll_xxT(i,2)*C1_m1_m2_5points(2,j) + &
- hprimewgll_xxT(i,3)*C1_m1_m2_5points(3,j) + &
- hprimewgll_xxT(i,4)*C1_m1_m2_5points(4,j) + &
- hprimewgll_xxT(i,5)*C1_m1_m2_5points(5,j)
- E2_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_5points(1,j) + &
- hprimewgll_xxT(i,2)*C2_m1_m2_5points(2,j) + &
- hprimewgll_xxT(i,3)*C2_m1_m2_5points(3,j) + &
- hprimewgll_xxT(i,4)*C2_m1_m2_5points(4,j) + &
- hprimewgll_xxT(i,5)*C2_m1_m2_5points(5,j)
+ ! computes 1. matrix multiplication for newtempx1,..
+ call mxm5_3comp_singleA(hprimewgll_xxT,m1,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1,m2)
+ ! computes 2. matrix multiplication for tempx2,..
+ call mxm5_3comp_3dmat_singleB(tempx2,tempy2,tempz2,m1,hprimewgll_xx,m1,newtempx2,newtempy2,newtempz2,NGLLX)
+ ! computes 3. matrix multiplication for newtempx3,..
+ call mxm5_3comp_singleB(tempx3,tempy3,tempz3,m2,hprimewgll_xx,newtempx3,newtempy3,newtempz3,m1)
- E3_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_5points(1,j) + &
- hprimewgll_xxT(i,2)*C3_m1_m2_5points(2,j) + &
- hprimewgll_xxT(i,3)*C3_m1_m2_5points(3,j) + &
- hprimewgll_xxT(i,4)*C3_m1_m2_5points(4,j) + &
- hprimewgll_xxT(i,5)*C3_m1_m2_5points(5,j)
- enddo
- enddo
+ ! sums contributions
- do i=1,m1
- do j=1,m1
- ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
- do k = 1,NGLLX
- newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
- tempx2(i,2,k)*hprimewgll_xx(2,j) + &
- tempx2(i,3,k)*hprimewgll_xx(3,j) + &
- tempx2(i,4,k)*hprimewgll_xx(4,j) + &
- tempx2(i,5,k)*hprimewgll_xx(5,j)
-
- newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
- tempy2(i,2,k)*hprimewgll_xx(2,j) + &
- tempy2(i,3,k)*hprimewgll_xx(3,j) + &
- tempy2(i,4,k)*hprimewgll_xx(4,j) + &
- tempy2(i,5,k)*hprimewgll_xx(5,j)
-
- newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
- tempz2(i,2,k)*hprimewgll_xx(2,j) + &
- tempz2(i,3,k)*hprimewgll_xx(3,j) + &
- tempz2(i,4,k)*hprimewgll_xx(4,j) + &
- tempz2(i,5,k)*hprimewgll_xx(5,j)
- enddo
- enddo
- enddo
-
- do j=1,m1
- do i=1,m2
- E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
- C1_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
- C1_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
- C1_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
- C1_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
-
- E2_mxm_m2_m1_5points(i,j) = C2_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
- C2_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
- C2_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
- C2_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
- C2_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
-
- E3_mxm_m2_m1_5points(i,j) = C3_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
- C3_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
- C3_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
- C3_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
- C3_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
- enddo
- enddo
-
- ! sum contributions
-
DO_LOOP_IJK
fac1 = wgllwgll_yz_3D(INDEX_IJK)
fac2 = wgllwgll_xz_3D(INDEX_IJK)
fac3 = wgllwgll_xy_3D(INDEX_IJK)
- sum_terms(1,INDEX_IJK) = - (fac1*newtempx1(INDEX_IJK) + fac2*newtempx2(INDEX_IJK) + fac3*newtempx3(INDEX_IJK))
- sum_terms(2,INDEX_IJK) = - (fac1*newtempy1(INDEX_IJK) + fac2*newtempy2(INDEX_IJK) + fac3*newtempy3(INDEX_IJK))
- sum_terms(3,INDEX_IJK) = - (fac1*newtempz1(INDEX_IJK) + fac2*newtempz2(INDEX_IJK) + fac3*newtempz3(INDEX_IJK))
+ sum_terms(INDEX_IJK,1) = - (fac1*newtempx1(INDEX_IJK) + fac2*newtempx2(INDEX_IJK) + fac3*newtempx3(INDEX_IJK))
+ sum_terms(INDEX_IJK,2) = - (fac1*newtempy1(INDEX_IJK) + fac2*newtempy2(INDEX_IJK) + fac3*newtempy3(INDEX_IJK))
+ sum_terms(INDEX_IJK,3) = - (fac1*newtempz1(INDEX_IJK) + fac2*newtempz2(INDEX_IJK) + fac3*newtempz3(INDEX_IJK))
ENDDO_LOOP_IJK
@@ -419,9 +270,9 @@
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- sum_terms(1,INDEX_IJK) = sum_terms(1,INDEX_IJK) + rho_s_H(1,INDEX_IJK)
- sum_terms(2,INDEX_IJK) = sum_terms(2,INDEX_IJK) + rho_s_H(2,INDEX_IJK)
- sum_terms(3,INDEX_IJK) = sum_terms(3,INDEX_IJK) + rho_s_H(3,INDEX_IJK)
+ sum_terms(INDEX_IJK,1) = sum_terms(INDEX_IJK,1) + rho_s_H(INDEX_IJK,1)
+ sum_terms(INDEX_IJK,2) = sum_terms(INDEX_IJK,2) + rho_s_H(INDEX_IJK,2)
+ sum_terms(INDEX_IJK,3) = sum_terms(INDEX_IJK,3) + rho_s_H(INDEX_IJK,3)
enddo
enddo
enddo
@@ -451,9 +302,9 @@
! do NOT use array syntax ":" for the three statements below otherwise most compilers
! will not be able to vectorize the outer loop
- accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,INDEX_IJK)
- accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,INDEX_IJK)
- accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,INDEX_IJK)
+ accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(INDEX_IJK,1)
+ accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(INDEX_IJK,2)
+ accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(INDEX_IJK,3)
#ifdef FORCE_VECTORIZATION
enddo
@@ -484,8 +335,6 @@
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, &
ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec,factor_common, &
c44store,muvstore, &
-! epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-! epsilondev_xz,epsilondev_yz, &
epsilondev_loc, &
deltat)
else
@@ -501,14 +350,165 @@
! save deviatoric strain for Runge-Kutta scheme
if(COMPUTE_AND_STORE_STRAIN) then
- epsilondev_xx(:,:,:,ispec) = epsilondev_loc(1,:,:,:)
- epsilondev_yy(:,:,:,ispec) = epsilondev_loc(2,:,:,:)
- epsilondev_xy(:,:,:,ispec) = epsilondev_loc(3,:,:,:)
- epsilondev_xz(:,:,:,ispec) = epsilondev_loc(4,:,:,:)
- epsilondev_yz(:,:,:,ispec) = epsilondev_loc(5,:,:,:)
+ epsilondev_xx(:,:,:,ispec) = epsilondev_loc(:,:,:,1)
+ epsilondev_yy(:,:,:,ispec) = epsilondev_loc(:,:,:,2)
+ epsilondev_xy(:,:,:,ispec) = epsilondev_loc(:,:,:,3)
+ epsilondev_xz(:,:,:,ispec) = epsilondev_loc(:,:,:,4)
+ epsilondev_yz(:,:,:,ispec) = epsilondev_loc(:,:,:,5)
endif
enddo ! of spectral element loop NSPEC_CRUST_MANTLE
+ contains
+
+!--------------------------------------------------------------------------------------------
+!
+! matrix-matrix multiplications
+!
+! subroutines adapted from Deville, Fischer and Mund, High-order methods
+! for incompressible fluid flow, Cambridge University Press (2002),
+! pages 386 and 389 and Figure 8.3.1
+!
+!--------------------------------------------------------------------------------------------
+!
+! note: the matrix-matrix multiplications are used for very small matrices ( 5 x 5 x 5 elements);
+! thus, calling external optimized libraries for these multiplications are in general slower
+!
+! please leave the routines here to help compilers inlining the code
+
+ subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5) :: A
+ real(kind=CUSTOM_REAL),dimension(5,n3) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A(i,1) * B1(1,j) &
+ + A(i,2) * B1(2,j) &
+ + A(i,3) * B1(3,j) &
+ + A(i,4) * B1(4,j) &
+ + A(i,5) * B1(5,j)
+
+ C2(i,j) = A(i,1) * B2(1,j) &
+ + A(i,2) * B2(2,j) &
+ + A(i,3) * B2(3,j) &
+ + A(i,4) * B2(4,j) &
+ + A(i,5) * B2(5,j)
+
+ C3(i,j) = A(i,1) * B3(1,j) &
+ + A(i,2) * B3(2,j) &
+ + A(i,3) * B3(3,j) &
+ + A(i,4) * B3(4,j) &
+ + A(i,5) * B3(5,j)
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_singleA
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(5,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A1(i,1) * B(1,j) &
+ + A1(i,2) * B(2,j) &
+ + A1(i,3) * B(3,j) &
+ + A1(i,4) * B(4,j) &
+ + A1(i,5) * B(5,j)
+
+ C2(i,j) = A2(i,1) * B(1,j) &
+ + A2(i,2) * B(2,j) &
+ + A2(i,3) * B(3,j) &
+ + A2(i,4) * B(4,j) &
+ + A2(i,5) * B(5,j)
+
+ C3(i,j) = A3(i,1) * B(1,j) &
+ + A3(i,2) * B(2,j) &
+ + A3(i,3) * B(3,j) &
+ + A3(i,4) * B(4,j) &
+ + A3(i,5) * B(5,j)
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_singleB
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 3-dimensional arrays (5,5,5), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5,n3) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(5,n2) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n2,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j,k
+
+ ! matrix-matrix multiplication
+ do j=1,n2
+ do i=1,n1
+ ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+ do k=1,n3
+ C1(i,j,k) = A1(i,1,k) * B(1,j) &
+ + A1(i,2,k) * B(2,j) &
+ + A1(i,3,k) * B(3,j) &
+ + A1(i,4,k) * B(4,j) &
+ + A1(i,5,k) * B(5,j)
+
+ C2(i,j,k) = A2(i,1,k) * B(1,j) &
+ + A2(i,2,k) * B(2,j) &
+ + A2(i,3,k) * B(3,j) &
+ + A2(i,4,k) * B(4,j) &
+ + A2(i,5,k) * B(5,j)
+
+ C3(i,j,k) = A3(i,1,k) * B(1,j) &
+ + A3(i,2,k) * B(2,j) &
+ + A3(i,3,k) * B(3,j) &
+ + A3(i,4,k) * B(4,j) &
+ + A3(i,5,k) * B(5,j)
+ enddo
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_3dmat_singleB
+
end subroutine compute_forces_crust_mantle_Dev
+
+
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_noDev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_noDev.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_noDev.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -85,8 +85,8 @@
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
! to allow for optimization of cache access by compiler
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATT) :: R_xx,R_yy,R_xy,R_xz,R_yz
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATT) :: &
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: R_xx,R_yy,R_xy,R_xz,R_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: &
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: &
@@ -94,7 +94,7 @@
real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: epsilon_trace_over_3
! [alpha,beta,gamma]val reduced to N_SLS and factor_common to N_SLS*NUM_NODES
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,vnspec) :: factor_common
real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
! inner/outer element run flag
@@ -105,8 +105,7 @@
! for attenuation
real(kind=CUSTOM_REAL) one_minus_sum_beta_use,minus_sum_beta
real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: factor_common_c44_muv
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
@@ -141,7 +140,7 @@
real(kind=CUSTOM_REAL) fac1,fac2,fac3
real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
real(kind=CUSTOM_REAL) kappal,kappavl,kappahl,muvl,muhl
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NDIM) :: sum_terms
real(kind=CUSTOM_REAL) tempx1l,tempx2l,tempx3l
real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l
@@ -157,7 +156,7 @@
double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
double precision factor,sx_l,sy_l,sz_l,gxl,gyl,gzl
double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NDIM) :: rho_s_H
! integer :: computed_elements
integer :: num_elements,ispec_p
@@ -269,11 +268,11 @@
else
epsilon_trace_over_3(i,j,k,ispec) = templ
endif
- epsilondev_loc(1,i,j,k) = duxdxl - templ
- epsilondev_loc(2,i,j,k) = duydyl - templ
- epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
- epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
- epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+ epsilondev_loc(i,j,k,1) = duxdxl - templ
+ epsilondev_loc(i,j,k,2) = duydyl - templ
+ epsilondev_loc(i,j,k,3) = 0.5 * duxdyl_plus_duydxl
+ epsilondev_loc(i,j,k,4) = 0.5 * duzdxl_plus_duxdzl
+ epsilondev_loc(i,j,k,5) = 0.5 * duzdyl_plus_duydzl
endif
! precompute terms for attenuation if needed
@@ -588,14 +587,14 @@
! subtract memory variables if attenuation
if(ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL ) then
do i_SLS = 1,N_SLS
- R_xx_val = R_xx(i_SLS,i,j,k,ispec)
- R_yy_val = R_yy(i_SLS,i,j,k,ispec)
+ R_xx_val = R_xx(i,j,k,i_SLS,ispec)
+ R_yy_val = R_yy(i,j,k,i_SLS,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(i_SLS,i,j,k,ispec)
- sigma_xz = sigma_xz - R_xz(i_SLS,i,j,k,ispec)
- sigma_yz = sigma_yz - R_yz(i_SLS,i,j,k,ispec)
+ sigma_xy = sigma_xy - R_xy(i,j,k,i_SLS,ispec)
+ sigma_xz = sigma_xz - R_xz(i,j,k,i_SLS,ispec)
+ sigma_yz = sigma_yz - R_yz(i,j,k,i_SLS,ispec)
enddo
endif
@@ -677,9 +676,9 @@
! precompute vector
factor = dble(jacobianl) * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
- rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
- rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+ rho_s_H(i,j,k,1) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+ rho_s_H(i,j,k,2) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+ rho_s_H(i,j,k,3) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
else
@@ -704,9 +703,9 @@
! precompute vector
factor = jacobianl * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+ rho_s_H(i,j,k,1) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+ rho_s_H(i,j,k,2) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+ rho_s_H(i,j,k,3) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
endif
@@ -770,11 +769,15 @@
fac2 = wgllwgll_xz(i,k)
fac3 = wgllwgll_xy(i,j)
- sum_terms(1,i,j,k) = - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l)
- sum_terms(2,i,j,k) = - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l)
- sum_terms(3,i,j,k) = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l)
+ sum_terms(i,j,k,1) = - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l)
+ sum_terms(i,j,k,2) = - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l)
+ sum_terms(i,j,k,3) = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l)
- if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
+ if(GRAVITY_VAL) then
+ sum_terms(i,j,k,1) = sum_terms(i,j,k,1) + rho_s_H(i,j,k,1)
+ sum_terms(i,j,k,2) = sum_terms(i,j,k,2) + rho_s_H(i,j,k,2)
+ sum_terms(i,j,k,3) = sum_terms(i,j,k,3) + rho_s_H(i,j,k,3)
+ endif
enddo ! NGLLX
enddo ! NGLLY
@@ -785,9 +788,9 @@
do j=1,NGLLY
do i=1,NGLLX
iglob = ibool(i,j,k,ispec)
- accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,i,j,k)
- accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,i,j,k)
- accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,i,j,k)
+ accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(i,j,k,1)
+ accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(i,j,k,2)
+ accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(i,j,k,3)
enddo
enddo
enddo
@@ -814,8 +817,6 @@
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, &
ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec,factor_common, &
c44store,muvstore, &
-! epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-! epsilondev_xz,epsilondev_yz, &
epsilondev_loc, &
deltat)
else
@@ -835,11 +836,11 @@
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- epsilondev_xx(i,j,k,ispec) = epsilondev_loc(1,i,j,k)
- epsilondev_yy(i,j,k,ispec) = epsilondev_loc(2,i,j,k)
- epsilondev_xy(i,j,k,ispec) = epsilondev_loc(3,i,j,k)
- epsilondev_xz(i,j,k,ispec) = epsilondev_loc(4,i,j,k)
- epsilondev_yz(i,j,k,ispec) = epsilondev_loc(5,i,j,k)
+ epsilondev_xx(i,j,k,ispec) = epsilondev_loc(i,j,k,1)
+ epsilondev_yy(i,j,k,ispec) = epsilondev_loc(i,j,k,2)
+ epsilondev_xy(i,j,k,ispec) = epsilondev_loc(i,j,k,3)
+ epsilondev_xz(i,j,k,ispec) = epsilondev_loc(i,j,k,4)
+ epsilondev_yz(i,j,k,ispec) = epsilondev_loc(i,j,k,5)
enddo
enddo
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -87,13 +87,12 @@
! to allow for optimization of cache access by compiler
! variable lengths for factor_common and one_minus_sum_beta
integer :: vnspec
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,vnspec) :: factor_common
real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATT) :: &
- R_xx,R_yy,R_xy,R_xz,R_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: R_xx,R_yy,R_xy,R_xz,R_yz
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATT) :: &
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: &
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: &
@@ -104,49 +103,16 @@
logical :: phase_is_inner
! local parameters
-
! Deville
- ! manually inline the calls to the Deville et al. (2002) routines
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_5points,E2_m1_m2_5points,E3_m1_m2_5points
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NDIM) :: sum_terms
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc
- equivalence(dummyx_loc,B1_m1_m2_5points)
- equivalence(dummyy_loc,B2_m1_m2_5points)
- equivalence(dummyz_loc,B3_m1_m2_5points)
- equivalence(tempx1,C1_m1_m2_5points)
- equivalence(tempy1,C2_m1_m2_5points)
- equivalence(tempz1,C3_m1_m2_5points)
- equivalence(newtempx1,E1_m1_m2_5points)
- equivalence(newtempy1,E2_m1_m2_5points)
- equivalence(newtempz1,E3_m1_m2_5points)
-
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
- A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
- C1_mxm_m2_m1_5points,C2_mxm_m2_m1_5points,C3_mxm_m2_m1_5points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
- E1_mxm_m2_m1_5points,E2_mxm_m2_m1_5points,E3_mxm_m2_m1_5points
-
- equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
- equivalence(dummyy_loc,A2_mxm_m2_m1_5points)
- equivalence(dummyz_loc,A3_mxm_m2_m1_5points)
- equivalence(tempx3,C1_mxm_m2_m1_5points)
- equivalence(tempy3,C2_mxm_m2_m1_5points)
- equivalence(tempz3,C3_mxm_m2_m1_5points)
- equivalence(newtempx3,E1_mxm_m2_m1_5points)
- equivalence(newtempy3,E2_mxm_m2_m1_5points)
- equivalence(newtempz3,E3_mxm_m2_m1_5points)
-
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
-
real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
@@ -170,12 +136,11 @@
double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
double precision theta,phi,factor,gxl,gyl,gzl,sx_l,sy_l,sz_l
double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NDIM) :: rho_s_H
real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
integer :: int_radius
integer :: ispec,iglob
- integer :: i,j,k
integer :: num_elements,ispec_p
integer :: iphase
@@ -185,6 +150,8 @@
! in all known applications, and in the main program we check that N_SLS == 3 if FORCE_VECTORIZATION is used and we stop
integer :: ijk
real(kind=CUSTOM_REAL) :: R_xx_val,R_yy_val
+#else
+ integer :: i,j,k
#endif
! ****************************************************
@@ -222,75 +189,83 @@
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
- do j=1,m2
- do i=1,m1
- C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B1_m1_m2_5points(5,j)
- C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B2_m1_m2_5points(5,j)
+ ! computes 1. matrix multiplication for tempx1,..
+ call mxm5_3comp_singleA(hprime_xx,m1,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1,m2)
+ ! computes 2. matrix multiplication for tempx2,..
+ call mxm5_3comp_3dmat_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m1,hprime_xxT,m1,tempx2,tempy2,tempz2,NGLLX)
+ ! computes 3. matrix multiplication for tempx1,..
+ call mxm5_3comp_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m2,hprime_xxT,tempx3,tempy3,tempz3,m1)
- C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B3_m1_m2_5points(5,j)
- enddo
- enddo
-
- do j=1,m1
- do i=1,m1
- ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
- do k = 1,NGLLX
- tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyx_loc(i,5,k)*hprime_xxT(5,j)
-
- tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyy_loc(i,5,k)*hprime_xxT(5,j)
-
- tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyz_loc(i,5,k)*hprime_xxT(5,j)
- enddo
- enddo
- enddo
-
- do j=1,m1
- do i=1,m2
- C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-
- C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-
- C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
- enddo
- enddo
-
+! do j=1,m2
+! do i=1,m1
+! C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
+! hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
+! hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
+! hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
+! hprime_xx(i,5)*B1_m1_m2_5points(5,j)
+!
+! C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
+! hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
+! hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
+! hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
+! hprime_xx(i,5)*B2_m1_m2_5points(5,j)
+!
+! C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
+! hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
+! hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
+! hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
+! hprime_xx(i,5)*B3_m1_m2_5points(5,j)
+! enddo
+! enddo
+!
+! do j=1,m1
+! do i=1,m1
+! ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+! do k = 1,NGLLX
+! tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
+! dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
+! dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
+! dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
+! dummyx_loc(i,5,k)*hprime_xxT(5,j)
+!
+! tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
+! dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
+! dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
+! dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
+! dummyy_loc(i,5,k)*hprime_xxT(5,j)
+!
+! tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
+! dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
+! dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
+! dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
+! dummyz_loc(i,5,k)*hprime_xxT(5,j)
+! enddo
+! enddo
+! enddo
+!
+! do j=1,m1
+! do i=1,m2
+! C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+! A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+! A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+! A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+! A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+!
+! C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+! A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+! A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+! A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+! A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+!
+! C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+! A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+! A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+! A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+! A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+! enddo
+! enddo
+!
DO_LOOP_IJK
! get derivatives of ux, uy and uz with respect to x, y and z
@@ -339,11 +314,11 @@
else
epsilon_trace_over_3(INDEX_IJK,ispec) = templ
endif
- epsilondev_loc(1,INDEX_IJK) = duxdxl - templ
- epsilondev_loc(2,INDEX_IJK) = duydyl - templ
- epsilondev_loc(3,INDEX_IJK) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
- epsilondev_loc(4,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
- epsilondev_loc(5,INDEX_IJK) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
+ epsilondev_loc(INDEX_IJK,1) = duxdxl - templ
+ epsilondev_loc(INDEX_IJK,2) = duydyl - templ
+ epsilondev_loc(INDEX_IJK,3) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl
+ epsilondev_loc(INDEX_IJK,4) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl
+ epsilondev_loc(INDEX_IJK,5) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl
endif
if(ANISOTROPIC_INNER_CORE_VAL) then
@@ -428,39 +403,36 @@
! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop
! in order to vectorize the outer loop
- R_xx_val = R_xx(1,INDEX_IJK,ispec)
- R_yy_val = R_yy(1,INDEX_IJK,ispec)
+ R_xx_val = R_xx(INDEX_IJK,1,ispec)
+ R_yy_val = R_yy(INDEX_IJK,1,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(1,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(1,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(1,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,1,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,1,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,1,ispec)
- R_xx_val = R_xx(2,INDEX_IJK,ispec)
- R_yy_val = R_yy(2,INDEX_IJK,ispec)
+ R_xx_val = R_xx(INDEX_IJK,2,ispec)
+ R_yy_val = R_yy(INDEX_IJK,2,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(2,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(2,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(2,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,2,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,2,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,2,ispec)
- R_xx_val = R_xx(3,INDEX_IJK,ispec)
- R_yy_val = R_yy(3,INDEX_IJK,ispec)
+ R_xx_val = R_xx(INDEX_IJK,3,ispec)
+ R_yy_val = R_yy(INDEX_IJK,3,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(3,INDEX_IJK,ispec)
- sigma_xz = sigma_xz - R_xz(3,INDEX_IJK,ispec)
- sigma_yz = sigma_yz - R_yz(3,INDEX_IJK,ispec)
+ sigma_xy = sigma_xy - R_xy(INDEX_IJK,3,ispec)
+ sigma_xz = sigma_xz - R_xz(INDEX_IJK,3,ispec)
+ sigma_yz = sigma_yz - R_yz(INDEX_IJK,3,ispec)
#else
! note: Fortran passes pointers to array location, thus R_memory(1,1,...) is fine
- call compute_element_att_stress(R_xx(1,INDEX_IJK,ispec), &
- R_yy(1,INDEX_IJK,ispec), &
- R_xy(1,INDEX_IJK,ispec), &
- R_xz(1,INDEX_IJK,ispec), &
- R_yz(1,INDEX_IJK,ispec), &
+ call compute_element_att_stress(i,j,k,R_xx(1,1,1,1,ispec),R_yy(1,1,1,1,ispec),R_xy(1,1,1,1,ispec), &
+ R_xz(1,1,1,1,ispec),R_yz(1,1,1,1,ispec), &
sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz)
#endif
endif
@@ -546,9 +518,9 @@
! precompute vector
factor = dble(jacobianl) * wgll_cube(INDEX_IJK)
- rho_s_H(1,INDEX_IJK) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
- rho_s_H(2,INDEX_IJK) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
- rho_s_H(3,INDEX_IJK) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+ rho_s_H(INDEX_IJK,1) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+ rho_s_H(INDEX_IJK,2) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+ rho_s_H(INDEX_IJK,3) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
else
@@ -573,9 +545,9 @@
! precompute vector
factor = jacobianl * wgll_cube(INDEX_IJK)
- rho_s_H(1,INDEX_IJK) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,INDEX_IJK) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,INDEX_IJK) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+ rho_s_H(INDEX_IJK,1) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+ rho_s_H(INDEX_IJK,2) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+ rho_s_H(INDEX_IJK,3) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
endif
@@ -599,75 +571,83 @@
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
- do j=1,m2
- do i=1,m1
- E1_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_5points(1,j) + &
- hprimewgll_xxT(i,2)*C1_m1_m2_5points(2,j) + &
- hprimewgll_xxT(i,3)*C1_m1_m2_5points(3,j) + &
- hprimewgll_xxT(i,4)*C1_m1_m2_5points(4,j) + &
- hprimewgll_xxT(i,5)*C1_m1_m2_5points(5,j)
- E2_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_5points(1,j) + &
- hprimewgll_xxT(i,2)*C2_m1_m2_5points(2,j) + &
- hprimewgll_xxT(i,3)*C2_m1_m2_5points(3,j) + &
- hprimewgll_xxT(i,4)*C2_m1_m2_5points(4,j) + &
- hprimewgll_xxT(i,5)*C2_m1_m2_5points(5,j)
+ ! computes 1. matrix multiplication for newtempx1,..
+ call mxm5_3comp_singleA(hprimewgll_xxT,m1,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1,m2)
+ ! computes 2. matrix multiplication for tempx2,..
+ call mxm5_3comp_3dmat_singleB(tempx2,tempy2,tempz2,m1,hprimewgll_xx,m1,newtempx2,newtempy2,newtempz2,NGLLX)
+ ! computes 3. matrix multiplication for newtempx3,..
+ call mxm5_3comp_singleB(tempx3,tempy3,tempz3,m2,hprimewgll_xx,newtempx3,newtempy3,newtempz3,m1)
- E3_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_5points(1,j) + &
- hprimewgll_xxT(i,2)*C3_m1_m2_5points(2,j) + &
- hprimewgll_xxT(i,3)*C3_m1_m2_5points(3,j) + &
- hprimewgll_xxT(i,4)*C3_m1_m2_5points(4,j) + &
- hprimewgll_xxT(i,5)*C3_m1_m2_5points(5,j)
- enddo
- enddo
-
- do i=1,m1
- do j=1,m1
- ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
- do k = 1,NGLLX
- newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
- tempx2(i,2,k)*hprimewgll_xx(2,j) + &
- tempx2(i,3,k)*hprimewgll_xx(3,j) + &
- tempx2(i,4,k)*hprimewgll_xx(4,j) + &
- tempx2(i,5,k)*hprimewgll_xx(5,j)
-
- newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
- tempy2(i,2,k)*hprimewgll_xx(2,j) + &
- tempy2(i,3,k)*hprimewgll_xx(3,j) + &
- tempy2(i,4,k)*hprimewgll_xx(4,j) + &
- tempy2(i,5,k)*hprimewgll_xx(5,j)
-
- newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
- tempz2(i,2,k)*hprimewgll_xx(2,j) + &
- tempz2(i,3,k)*hprimewgll_xx(3,j) + &
- tempz2(i,4,k)*hprimewgll_xx(4,j) + &
- tempz2(i,5,k)*hprimewgll_xx(5,j)
- enddo
- enddo
- enddo
-
- do j=1,m1
- do i=1,m2
- E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
- C1_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
- C1_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
- C1_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
- C1_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
-
- E2_mxm_m2_m1_5points(i,j) = C2_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
- C2_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
- C2_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
- C2_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
- C2_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
-
- E3_mxm_m2_m1_5points(i,j) = C3_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
- C3_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
- C3_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
- C3_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
- C3_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
- enddo
- enddo
-
+! do j=1,m2
+! do i=1,m1
+! E1_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_5points(1,j) + &
+! hprimewgll_xxT(i,2)*C1_m1_m2_5points(2,j) + &
+! hprimewgll_xxT(i,3)*C1_m1_m2_5points(3,j) + &
+! hprimewgll_xxT(i,4)*C1_m1_m2_5points(4,j) + &
+! hprimewgll_xxT(i,5)*C1_m1_m2_5points(5,j)
+!
+! E2_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_5points(1,j) + &
+! hprimewgll_xxT(i,2)*C2_m1_m2_5points(2,j) + &
+! hprimewgll_xxT(i,3)*C2_m1_m2_5points(3,j) + &
+! hprimewgll_xxT(i,4)*C2_m1_m2_5points(4,j) + &
+! hprimewgll_xxT(i,5)*C2_m1_m2_5points(5,j)
+!
+! E3_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_5points(1,j) + &
+! hprimewgll_xxT(i,2)*C3_m1_m2_5points(2,j) + &
+! hprimewgll_xxT(i,3)*C3_m1_m2_5points(3,j) + &
+! hprimewgll_xxT(i,4)*C3_m1_m2_5points(4,j) + &
+! hprimewgll_xxT(i,5)*C3_m1_m2_5points(5,j)
+! enddo
+! enddo
+!
+! do i=1,m1
+! do j=1,m1
+! ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+! do k = 1,NGLLX
+! newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
+! tempx2(i,2,k)*hprimewgll_xx(2,j) + &
+! tempx2(i,3,k)*hprimewgll_xx(3,j) + &
+! tempx2(i,4,k)*hprimewgll_xx(4,j) + &
+! tempx2(i,5,k)*hprimewgll_xx(5,j)
+!
+! newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
+! tempy2(i,2,k)*hprimewgll_xx(2,j) + &
+! tempy2(i,3,k)*hprimewgll_xx(3,j) + &
+! tempy2(i,4,k)*hprimewgll_xx(4,j) + &
+! tempy2(i,5,k)*hprimewgll_xx(5,j)
+!
+! newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
+! tempz2(i,2,k)*hprimewgll_xx(2,j) + &
+! tempz2(i,3,k)*hprimewgll_xx(3,j) + &
+! tempz2(i,4,k)*hprimewgll_xx(4,j) + &
+! tempz2(i,5,k)*hprimewgll_xx(5,j)
+! enddo
+! enddo
+! enddo
+!
+! do j=1,m1
+! do i=1,m2
+! E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
+! C1_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
+! C1_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
+! C1_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
+! C1_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
+!
+! E2_mxm_m2_m1_5points(i,j) = C2_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
+! C2_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
+! C2_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
+! C2_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
+! C2_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
+!
+! E3_mxm_m2_m1_5points(i,j) = C3_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
+! C3_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
+! C3_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
+! C3_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
+! C3_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
+! enddo
+! enddo
+!
! sums contributions
DO_LOOP_IJK
@@ -675,9 +655,9 @@
fac1 = wgllwgll_yz_3D(INDEX_IJK)
fac2 = wgllwgll_xz_3D(INDEX_IJK)
fac3 = wgllwgll_xy_3D(INDEX_IJK)
- sum_terms(1,INDEX_IJK) = - (fac1*newtempx1(INDEX_IJK) + fac2*newtempx2(INDEX_IJK) + fac3*newtempx3(INDEX_IJK))
- sum_terms(2,INDEX_IJK) = - (fac1*newtempy1(INDEX_IJK) + fac2*newtempy2(INDEX_IJK) + fac3*newtempy3(INDEX_IJK))
- sum_terms(3,INDEX_IJK) = - (fac1*newtempz1(INDEX_IJK) + fac2*newtempz2(INDEX_IJK) + fac3*newtempz3(INDEX_IJK))
+ sum_terms(INDEX_IJK,1) = - (fac1*newtempx1(INDEX_IJK) + fac2*newtempx2(INDEX_IJK) + fac3*newtempx3(INDEX_IJK))
+ sum_terms(INDEX_IJK,2) = - (fac1*newtempy1(INDEX_IJK) + fac2*newtempy2(INDEX_IJK) + fac3*newtempy3(INDEX_IJK))
+ sum_terms(INDEX_IJK,3) = - (fac1*newtempz1(INDEX_IJK) + fac2*newtempz2(INDEX_IJK) + fac3*newtempz3(INDEX_IJK))
ENDDO_LOOP_IJK
@@ -692,9 +672,9 @@
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- sum_terms(1,INDEX_IJK) = sum_terms(1,INDEX_IJK) + rho_s_H(1,INDEX_IJK)
- sum_terms(2,INDEX_IJK) = sum_terms(2,INDEX_IJK) + rho_s_H(2,INDEX_IJK)
- sum_terms(3,INDEX_IJK) = sum_terms(3,INDEX_IJK) + rho_s_H(3,INDEX_IJK)
+ sum_terms(INDEX_IJK,1) = sum_terms(INDEX_IJK,1) + rho_s_H(INDEX_IJK,1)
+ sum_terms(INDEX_IJK,2) = sum_terms(INDEX_IJK,2) + rho_s_H(INDEX_IJK,2)
+ sum_terms(INDEX_IJK,3) = sum_terms(INDEX_IJK,3) + rho_s_H(INDEX_IJK,3)
enddo
enddo
enddo
@@ -722,9 +702,9 @@
iglob = ibool(INDEX_IJK,ispec)
! do NOT use array syntax ":" for the three statements below
! otherwise most compilers will not be able to vectorize the outer loop
- accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(1,INDEX_IJK)
- accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(2,INDEX_IJK)
- accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(3,INDEX_IJK)
+ accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(INDEX_IJK,1)
+ accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(INDEX_IJK,2)
+ accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(INDEX_IJK,3)
#ifdef FORCE_VECTORIZATION
enddo
@@ -755,8 +735,6 @@
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, &
ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec,factor_common, &
muvstore, &
-! epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-! epsilondev_xz,epsilondev_yz, &
epsilondev_loc, &
deltat)
else
@@ -772,16 +750,165 @@
! save deviatoric strain for Runge-Kutta scheme
if(COMPUTE_AND_STORE_STRAIN) then
- epsilondev_xx(:,:,:,ispec) = epsilondev_loc(1,:,:,:)
- epsilondev_yy(:,:,:,ispec) = epsilondev_loc(2,:,:,:)
- epsilondev_xy(:,:,:,ispec) = epsilondev_loc(3,:,:,:)
- epsilondev_xz(:,:,:,ispec) = epsilondev_loc(4,:,:,:)
- epsilondev_yz(:,:,:,ispec) = epsilondev_loc(5,:,:,:)
+ epsilondev_xx(:,:,:,ispec) = epsilondev_loc(:,:,:,1)
+ epsilondev_yy(:,:,:,ispec) = epsilondev_loc(:,:,:,2)
+ epsilondev_xy(:,:,:,ispec) = epsilondev_loc(:,:,:,3)
+ epsilondev_xz(:,:,:,ispec) = epsilondev_loc(:,:,:,4)
+ epsilondev_yz(:,:,:,ispec) = epsilondev_loc(:,:,:,5)
endif
endif ! end of test to exclude fictitious elements in central cube
enddo ! of spectral element loop
+ contains
+
+!--------------------------------------------------------------------------------------------
+!
+! matrix-matrix multiplications
+!
+! subroutines adapted from Deville, Fischer and Mund, High-order methods
+! for incompressible fluid flow, Cambridge University Press (2002),
+! pages 386 and 389 and Figure 8.3.1
+!
+!--------------------------------------------------------------------------------------------
+!
+! note: the matrix-matrix multiplications are used for very small matrices ( 5 x 5 x 5 elements);
+! thus, calling external optimized libraries for these multiplications are in general slower
+!
+! please leave the routines here to help compilers inlining the code
+
+ subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5) :: A
+ real(kind=CUSTOM_REAL),dimension(5,n3) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A(i,1) * B1(1,j) &
+ + A(i,2) * B1(2,j) &
+ + A(i,3) * B1(3,j) &
+ + A(i,4) * B1(4,j) &
+ + A(i,5) * B1(5,j)
+
+ C2(i,j) = A(i,1) * B2(1,j) &
+ + A(i,2) * B2(2,j) &
+ + A(i,3) * B2(3,j) &
+ + A(i,4) * B2(4,j) &
+ + A(i,5) * B2(5,j)
+
+ C3(i,j) = A(i,1) * B3(1,j) &
+ + A(i,2) * B3(2,j) &
+ + A(i,3) * B3(3,j) &
+ + A(i,4) * B3(4,j) &
+ + A(i,5) * B3(5,j)
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_singleA
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(5,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C1(i,j) = A1(i,1) * B(1,j) &
+ + A1(i,2) * B(2,j) &
+ + A1(i,3) * B(3,j) &
+ + A1(i,4) * B(4,j) &
+ + A1(i,5) * B(5,j)
+
+ C2(i,j) = A2(i,1) * B(1,j) &
+ + A2(i,2) * B(2,j) &
+ + A2(i,3) * B(3,j) &
+ + A2(i,4) * B(4,j) &
+ + A2(i,5) * B(5,j)
+
+ C3(i,j) = A3(i,1) * B(1,j) &
+ + A3(i,2) * B(2,j) &
+ + A3(i,3) * B(3,j) &
+ + A3(i,4) * B(4,j) &
+ + A3(i,5) * B(5,j)
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_singleB
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 3-dimensional arrays (5,5,5), same B matrix for all 3 component arrays
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5,n3) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(5,n2) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n2,n3) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j,k
+
+ ! matrix-matrix multiplication
+ do j=1,n2
+ do i=1,n1
+ ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+ do k=1,n3
+ C1(i,j,k) = A1(i,1,k) * B(1,j) &
+ + A1(i,2,k) * B(2,j) &
+ + A1(i,3,k) * B(3,j) &
+ + A1(i,4,k) * B(4,j) &
+ + A1(i,5,k) * B(5,j)
+
+ C2(i,j,k) = A2(i,1,k) * B(1,j) &
+ + A2(i,2,k) * B(2,j) &
+ + A2(i,3,k) * B(3,j) &
+ + A2(i,4,k) * B(4,j) &
+ + A2(i,5,k) * B(5,j)
+
+ C3(i,j,k) = A3(i,1,k) * B(1,j) &
+ + A3(i,2,k) * B(2,j) &
+ + A3(i,3,k) * B(3,j) &
+ + A3(i,4,k) * B(4,j) &
+ + A3(i,5,k) * B(5,j)
+ enddo
+ enddo
+ enddo
+
+ end subroutine mxm5_3comp_3dmat_singleB
+
end subroutine compute_forces_inner_core_Dev
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_noDev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_noDev.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_noDev.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -79,11 +79,12 @@
! variable sized array variables
integer :: vnspec
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,vnspec) :: factor_common
real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATT) :: R_xx,R_yy,R_xy,R_xz,R_yz
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATT) :: &
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: R_xx,R_yy,R_xy,R_xz,R_yz
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: &
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: &
@@ -94,7 +95,7 @@
logical :: phase_is_inner
! local parameters
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
@@ -114,7 +115,7 @@
real(kind=CUSTOM_REAL) fac1,fac2,fac3
real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
real(kind=CUSTOM_REAL) kappal
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NDIM) :: sum_terms
real(kind=CUSTOM_REAL) minus_sum_beta
real(kind=CUSTOM_REAL) c11l,c33l,c12l,c13l,c44l
@@ -134,7 +135,7 @@
double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
double precision theta,phi,factor,gxl,gyl,gzl,sx_l,sy_l,sz_l
double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NDIM) :: rho_s_H
real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
integer :: int_radius
@@ -250,11 +251,11 @@
else
epsilon_trace_over_3(i,j,k,ispec) = templ
endif
- epsilondev_loc(1,i,j,k) = duxdxl - templ
- epsilondev_loc(2,i,j,k) = duydyl - templ
- epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
- epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
- epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+ epsilondev_loc(i,j,k,1) = duxdxl - templ
+ epsilondev_loc(i,j,k,2) = duydyl - templ
+ epsilondev_loc(i,j,k,3) = 0.5 * duxdyl_plus_duydxl
+ epsilondev_loc(i,j,k,4) = 0.5 * duzdxl_plus_duxdzl
+ epsilondev_loc(i,j,k,5) = 0.5 * duzdyl_plus_duydzl
endif
if(ANISOTROPIC_INNER_CORE_VAL) then
@@ -334,14 +335,14 @@
! subtract memory variables if attenuation
if( ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL ) then
do i_SLS = 1,N_SLS
- R_xx_val = R_xx(i_SLS,i,j,k,ispec)
- R_yy_val = R_yy(i_SLS,i,j,k,ispec)
+ R_xx_val = R_xx(i,j,k,i_SLS,ispec)
+ R_yy_val = R_yy(i,j,k,i_SLS,ispec)
sigma_xx = sigma_xx - R_xx_val
sigma_yy = sigma_yy - R_yy_val
sigma_zz = sigma_zz + R_xx_val + R_yy_val
- sigma_xy = sigma_xy - R_xy(i_SLS,i,j,k,ispec)
- sigma_xz = sigma_xz - R_xz(i_SLS,i,j,k,ispec)
- sigma_yz = sigma_yz - R_yz(i_SLS,i,j,k,ispec)
+ sigma_xy = sigma_xy - R_xy(i,j,k,i_SLS,ispec)
+ sigma_xz = sigma_xz - R_xz(i,j,k,i_SLS,ispec)
+ sigma_yz = sigma_yz - R_yz(i,j,k,i_SLS,ispec)
enddo
endif
@@ -426,9 +427,9 @@
! precompute vector
factor = dble(jacobianl) * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
- rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
- rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+ rho_s_H(i,j,k,1) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+ rho_s_H(i,j,k,2) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+ rho_s_H(i,j,k,3) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
else
@@ -453,9 +454,9 @@
! precompute vector
factor = jacobianl * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+ rho_s_H(i,j,k,1) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+ rho_s_H(i,j,k,2) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+ rho_s_H(i,j,k,3) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
endif
@@ -519,12 +520,15 @@
fac2 = wgllwgll_xz(i,k)
fac3 = wgllwgll_xy(i,j)
- sum_terms(1,i,j,k) = - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l)
- sum_terms(2,i,j,k) = - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l)
- sum_terms(3,i,j,k) = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l)
+ sum_terms(i,j,k,1) = - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l)
+ sum_terms(i,j,k,2) = - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l)
+ sum_terms(i,j,k,3) = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l)
- if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
-
+ if(GRAVITY_VAL) then
+ sum_terms(i,j,k,1) = sum_terms(i,j,k,1) + rho_s_H(i,j,k,1)
+ sum_terms(i,j,k,2) = sum_terms(i,j,k,2) + rho_s_H(i,j,k,2)
+ sum_terms(i,j,k,3) = sum_terms(i,j,k,3) + rho_s_H(i,j,k,3)
+ endif
enddo
enddo
enddo
@@ -534,7 +538,9 @@
do j=1,NGLLY
do i=1,NGLLX
iglob = ibool(i,j,k,ispec)
- accel_inner_core(:,iglob) = accel_inner_core(:,iglob) + sum_terms(:,i,j,k)
+ accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(i,j,k,1)
+ accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(i,j,k,2)
+ accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(i,j,k,3)
enddo
enddo
enddo
@@ -561,8 +567,6 @@
R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, &
ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec,factor_common, &
muvstore, &
-! epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-! epsilondev_xz,epsilondev_yz, &
epsilondev_loc, &
deltat)
else
@@ -581,11 +585,11 @@
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- epsilondev_xx(i,j,k,ispec) = epsilondev_loc(1,i,j,k)
- epsilondev_yy(i,j,k,ispec) = epsilondev_loc(2,i,j,k)
- epsilondev_xy(i,j,k,ispec) = epsilondev_loc(3,i,j,k)
- epsilondev_xz(i,j,k,ispec) = epsilondev_loc(4,i,j,k)
- epsilondev_yz(i,j,k,ispec) = epsilondev_loc(5,i,j,k)
+ epsilondev_xx(i,j,k,ispec) = epsilondev_loc(i,j,k,1)
+ epsilondev_yy(i,j,k,ispec) = epsilondev_loc(i,j,k,2)
+ epsilondev_xy(i,j,k,ispec) = epsilondev_loc(i,j,k,3)
+ epsilondev_xz(i,j,k,ispec) = epsilondev_loc(i,j,k,4)
+ epsilondev_yz(i,j,k,ispec) = epsilondev_loc(i,j,k,5)
enddo
enddo
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -83,8 +83,6 @@
logical :: phase_is_inner
! local parameters
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: newtempx1,newtempx2,newtempx3
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: sum_terms
! for gravity
@@ -92,45 +90,34 @@
double precision :: radius,theta,phi,gxl,gyl,gzl
double precision :: cos_theta,sin_theta,cos_phi,sin_phi
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: gravity_term
+
+ double precision, dimension(NGLLX,NGLLY,NGLLZ) :: temp_gxl,temp_gyl,temp_gzl
+
! for the Euler scheme for rotation
real(kind=CUSTOM_REAL) :: two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rotation,B_rotation, &
ux_rotation,uy_rotation,dpotentialdx_with_rot,dpotentialdy_with_rot
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: source_euler_A,source_euler_B
integer :: ispec,iglob
- integer :: i,j,k
real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
- ! manually inline the calls to the Deville et al. (2002) routines
+ ! Deville
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: newtempx1,newtempx2,newtempx3
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_5points
- equivalence(dummyx_loc,B1_m1_m2_5points)
- equivalence(tempx1,C1_m1_m2_5points)
- equivalence(newtempx1,E1_m1_m2_5points)
-
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: A1_mxm_m2_m1_5points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: C1_mxm_m2_m1_5points
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: E1_mxm_m2_m1_5points
-
- equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
- equivalence(tempx3,C1_mxm_m2_m1_5points)
- equivalence(newtempx3,E1_mxm_m2_m1_5points)
-
- double precision, dimension(NGLLX,NGLLY,NGLLZ) :: temp_gxl,temp_gyl,temp_gzl
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
displ_times_grad_x_ln_rho,displ_times_grad_y_ln_rho,displ_times_grad_z_ln_rho
-! integer :: computed_elements
integer :: num_elements,ispec_p
integer :: iphase
#ifdef FORCE_VECTORIZATION
integer :: ijk
+#else
+ integer :: i,j,k
#endif
! ****************************************************
@@ -198,38 +185,14 @@
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
- do j=1,m2
- do i=1,m1
- C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
- hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
- hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
- hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
- hprime_xx(i,5)*B1_m1_m2_5points(5,j)
- enddo
- enddo
- do k = 1,NGLLX
- do j=1,m1
- do i=1,m1
- tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
- dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
- dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
- dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
- dummyx_loc(i,5,k)*hprime_xxT(5,j)
- enddo
- enddo
- enddo
+ ! computes 1. matrix multiplication for tempx1,..
+ call mxm5_single(hprime_xx,m1,dummyx_loc,tempx1,m2)
+ ! computes 2. matrix multiplication for tempx2,..
+ call mxm5_3dmat_single(dummyx_loc,m1,hprime_xxT,m1,tempx2,NGLLX)
+ ! computes 3. matrix multiplication for tempx1,..
+ call mxm5_single(dummyx_loc,m2,hprime_xxT,tempx3,m1)
- do j=1,m1
- do i=1,m2
- C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
- A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
- A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
- A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
- A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
- enddo
- enddo
-
DO_LOOP_IJK
! get derivatives of velocity potential with respect to x, y and z
@@ -373,38 +336,14 @@
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
- do j=1,m2
- do i=1,m1
- E1_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_5points(1,j) + &
- hprimewgll_xxT(i,2)*C1_m1_m2_5points(2,j) + &
- hprimewgll_xxT(i,3)*C1_m1_m2_5points(3,j) + &
- hprimewgll_xxT(i,4)*C1_m1_m2_5points(4,j) + &
- hprimewgll_xxT(i,5)*C1_m1_m2_5points(5,j)
- enddo
- enddo
- do k = 1,NGLLX
- do j=1,m1
- do i=1,m1
- newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
- tempx2(i,2,k)*hprimewgll_xx(2,j) + &
- tempx2(i,3,k)*hprimewgll_xx(3,j) + &
- tempx2(i,4,k)*hprimewgll_xx(4,j) + &
- tempx2(i,5,k)*hprimewgll_xx(5,j)
- enddo
- enddo
- enddo
+ ! computes 1. matrix multiplication for newtempx1,..
+ call mxm5_single(hprimewgll_xxT,m1,tempx1,newtempx1,m2)
+ ! computes 2. matrix multiplication for tempx2,..
+ call mxm5_3dmat_single(tempx2,m1,hprimewgll_xx,m1,newtempx2,NGLLX)
+ ! computes 3. matrix multiplication for newtempx3,..
+ call mxm5_single(tempx3,m2,hprimewgll_xx,newtempx3,m1)
- do j=1,m1
- do i=1,m2
- E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
- C1_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
- C1_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
- C1_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
- C1_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
- enddo
- enddo
-
! sum contributions from each element to the global mesh and add gravity term
DO_LOOP_IJK
@@ -488,5 +427,86 @@
enddo ! spectral element loop
+ contains
+
+!--------------------------------------------------------------------------------------------
+!
+! matrix-matrix multiplications
+!
+! subroutines adapted from Deville, Fischer and Mund, High-order methods
+! for incompressible fluid flow, Cambridge University Press (2002),
+! pages 386 and 389 and Figure 8.3.1
+!
+!--------------------------------------------------------------------------------------------
+!
+! note: the matrix-matrix multiplications are used for very small matrices ( 5 x 5 x 5 elements);
+! thus, calling external optimized libraries for these multiplications are in general slower
+!
+! please leave the routines here to help compilers inlining the code
+
+ subroutine mxm5_single(A,n1,B,C,n3)
+
+! 2-dimensional arrays (25,5)/(5,25)
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5) :: A
+ real(kind=CUSTOM_REAL),dimension(5,n3) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3) :: C
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j=1,n3
+ do i=1,n1
+ C(i,j) = A(i,1) * B(1,j) &
+ + A(i,2) * B(2,j) &
+ + A(i,3) * B(3,j) &
+ + A(i,4) * B(4,j) &
+ + A(i,5) * B(5,j)
+ enddo
+ enddo
+
+ end subroutine mxm5_single
+
+
+!--------------------------------------------------------------------------------------------
+
+ subroutine mxm5_3dmat_single(A,n1,B,n2,C,n3)
+
+! 3-dimensional arrays (5,5,5) for A and C
+
+ use constants_solver,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,5,n3) :: A
+ real(kind=CUSTOM_REAL),dimension(5,n2) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n2,n3) :: C
+
+ ! local parameters
+ integer :: i,j,k
+
+ ! matrix-matrix multiplication
+ do j=1,n2
+ do i=1,n1
+ ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+ do k=1,n3
+ C(i,j,k) = A(i,1,k) * B(1,j) &
+ + A(i,2,k) * B(2,j) &
+ + A(i,3,k) * B(3,j) &
+ + A(i,4,k) * B(4,j) &
+ + A(i,5,k) * B(5,j)
+ enddo
+ enddo
+ enddo
+
+ end subroutine mxm5_3dmat_single
+
end subroutine compute_forces_outer_core_Dev
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_kernels.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_kernels.F90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_kernels.F90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -74,7 +74,7 @@
real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc
real(kind=CUSTOM_REAL), dimension(5) :: b_epsilondev_loc
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: b_epsilondev_loc_matrix
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: b_epsilondev_loc_matrix
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: b_eps_trace_over_3_loc_matrix
integer :: i,j,k,ispec,iglob
@@ -108,11 +108,11 @@
else
! backward/reconstructed strain arrays
b_eps_trace_over_3_loc_matrix(:,:,:) = b_eps_trace_over_3_crust_mantle(:,:,:,ispec)
- b_epsilondev_loc_matrix(1,:,:,:) = b_epsilondev_xx_crust_mantle(:,:,:,ispec)
- b_epsilondev_loc_matrix(2,:,:,:) = b_epsilondev_yy_crust_mantle(:,:,:,ispec)
- b_epsilondev_loc_matrix(3,:,:,:) = b_epsilondev_xy_crust_mantle(:,:,:,ispec)
- b_epsilondev_loc_matrix(4,:,:,:) = b_epsilondev_xz_crust_mantle(:,:,:,ispec)
- b_epsilondev_loc_matrix(5,:,:,:) = b_epsilondev_yz_crust_mantle(:,:,:,ispec)
+ b_epsilondev_loc_matrix(:,:,:,1) = b_epsilondev_xx_crust_mantle(:,:,:,ispec)
+ b_epsilondev_loc_matrix(:,:,:,2) = b_epsilondev_yy_crust_mantle(:,:,:,ispec)
+ b_epsilondev_loc_matrix(:,:,:,3) = b_epsilondev_xy_crust_mantle(:,:,:,ispec)
+ b_epsilondev_loc_matrix(:,:,:,4) = b_epsilondev_xz_crust_mantle(:,:,:,ispec)
+ b_epsilondev_loc_matrix(:,:,:,5) = b_epsilondev_yz_crust_mantle(:,:,:,ispec)
endif
! For anisotropic kernels
@@ -138,8 +138,8 @@
! behave better for smoother wavefields, thus containing less numerical artefacts.
rho_kl_crust_mantle(i,j,k,ispec) = rho_kl_crust_mantle(i,j,k,ispec) &
+ deltat * (accel_crust_mantle(1,iglob) * b_displ_crust_mantle(1,iglob) &
- + accel_crust_mantle(2,iglob) * b_displ_crust_mantle(2,iglob) &
- + accel_crust_mantle(3,iglob) * b_displ_crust_mantle(3,iglob) )
+ + accel_crust_mantle(2,iglob) * b_displ_crust_mantle(2,iglob) &
+ + accel_crust_mantle(3,iglob) * b_displ_crust_mantle(3,iglob) )
! fully anisotropic kernel
! temporary arrays
@@ -149,7 +149,11 @@
epsilondev_loc(4) = epsilondev_xz_crust_mantle(i,j,k,ispec)
epsilondev_loc(5) = epsilondev_yz_crust_mantle(i,j,k,ispec)
- b_epsilondev_loc(:) = b_epsilondev_loc_matrix(:,i,j,k)
+ b_epsilondev_loc(1) = b_epsilondev_loc_matrix(i,j,k,1)
+ b_epsilondev_loc(2) = b_epsilondev_loc_matrix(i,j,k,2)
+ b_epsilondev_loc(3) = b_epsilondev_loc_matrix(i,j,k,3)
+ b_epsilondev_loc(4) = b_epsilondev_loc_matrix(i,j,k,4)
+ b_epsilondev_loc(5) = b_epsilondev_loc_matrix(i,j,k,5)
call compute_strain_product(prod,eps_trace_over_3_crust_mantle(i,j,k,ispec),epsilondev_loc, &
b_eps_trace_over_3_loc_matrix(i,j,k),b_epsilondev_loc)
@@ -183,8 +187,8 @@
! behave better for smoother wavefields, thus containing less numerical artefacts.
rho_kl_crust_mantle(i,j,k,ispec) = rho_kl_crust_mantle(i,j,k,ispec) &
+ deltat * (accel_crust_mantle(1,iglob) * b_displ_crust_mantle(1,iglob) &
- + accel_crust_mantle(2,iglob) * b_displ_crust_mantle(2,iglob) &
- + accel_crust_mantle(3,iglob) * b_displ_crust_mantle(3,iglob) )
+ + accel_crust_mantle(2,iglob) * b_displ_crust_mantle(2,iglob) &
+ + accel_crust_mantle(3,iglob) * b_displ_crust_mantle(3,iglob) )
! isotropic kernels
! temporary arrays
@@ -194,7 +198,11 @@
epsilondev_loc(4) = epsilondev_xz_crust_mantle(i,j,k,ispec)
epsilondev_loc(5) = epsilondev_yz_crust_mantle(i,j,k,ispec)
- b_epsilondev_loc(:) = b_epsilondev_loc_matrix(:,i,j,k)
+ b_epsilondev_loc(1) = b_epsilondev_loc_matrix(i,j,k,1)
+ b_epsilondev_loc(2) = b_epsilondev_loc_matrix(i,j,k,2)
+ b_epsilondev_loc(3) = b_epsilondev_loc_matrix(i,j,k,3)
+ b_epsilondev_loc(4) = b_epsilondev_loc_matrix(i,j,k,4)
+ b_epsilondev_loc(5) = b_epsilondev_loc_matrix(i,j,k,5)
! kernel for shear modulus, see e.g. Tromp et al. (2005), equation (17)
! note: multiplication with 2*mu(x) will be done after the time loop
@@ -205,7 +213,6 @@
+ 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
epsilondev_loc(5)*b_epsilondev_loc(5)) )
-
! kernel for bulk modulus, see e.g. Tromp et al. (2005), equation (18)
! note: multiplication with kappa(x) will be done after the time loop
alpha_kl_crust_mantle(i,j,k,ispec) = alpha_kl_crust_mantle(i,j,k,ispec) &
@@ -251,106 +258,153 @@
real(kind=CUSTOM_REAL) :: tempz1l,tempz2l,tempz3l
real(kind=CUSTOM_REAL), dimension(5) :: b_epsilondev_loc
real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc
- real(kind=CUSTOM_REAL), dimension(3) :: vector_accel
real(kind=CUSTOM_REAL) :: div_displ,b_div_displ
+ real(kind=CUSTOM_REAL) :: gradx,grady,gradz
integer :: i,j,k,l,ispec,iglob
+ logical,dimension(NGLOB_OUTER_CORE) :: mask_ibool
! outer_core -- compute the actual displacement and acceleration (NDIM,NGLOBMAX_OUTER_CORE)
if( .not. GPU_MODE ) then
! on CPU
+
+ ! pre-calculates gradients in outer core on CPU
+ mask_ibool(:) = .false.
do ispec = 1, NSPEC_OUTER_CORE
do k = 1, NGLLZ
do j = 1, NGLLY
do i = 1, NGLLX
+
+ ! global index
iglob = ibool_outer_core(i,j,k,ispec)
- xixl = xix_outer_core(i,j,k,ispec)
- xiyl = xiy_outer_core(i,j,k,ispec)
- xizl = xiz_outer_core(i,j,k,ispec)
- etaxl = etax_outer_core(i,j,k,ispec)
- etayl = etay_outer_core(i,j,k,ispec)
- etazl = etaz_outer_core(i,j,k,ispec)
- gammaxl = gammax_outer_core(i,j,k,ispec)
- gammayl = gammay_outer_core(i,j,k,ispec)
- gammazl = gammaz_outer_core(i,j,k,ispec)
+ ! only calculate the gradients once for shared nodes
+ if( .not. mask_ibool(iglob) ) then
- ! calculates gradient grad(b_displ)
- tempx1l = 0._CUSTOM_REAL
- tempx2l = 0._CUSTOM_REAL
- tempx3l = 0._CUSTOM_REAL
- do l=1,NGLLX
- tempx1l = tempx1l + b_displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- enddo
- do l=1,NGLLY
- tempx2l = tempx2l + b_displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- enddo
- do l=1,NGLLZ
- tempx3l = tempx3l + b_displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- enddo
- b_vector_displ_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
- b_vector_displ_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
- b_vector_displ_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+ ! masks this global point
+ mask_ibool(iglob) = .true.
- ! calculates gradient grad(accel)
- tempx1l = 0._CUSTOM_REAL
- tempx2l = 0._CUSTOM_REAL
- tempx3l = 0._CUSTOM_REAL
- do l=1,NGLLX
- tempx1l = tempx1l + accel_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- enddo
- do l=1,NGLLY
- tempx2l = tempx2l + accel_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- enddo
- do l=1,NGLLZ
- tempx3l = tempx3l + accel_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- enddo
- vector_accel(1) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
- vector_accel(2) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
- vector_accel(3) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+ xixl = xix_outer_core(i,j,k,ispec)
+ xiyl = xiy_outer_core(i,j,k,ispec)
+ xizl = xiz_outer_core(i,j,k,ispec)
+ etaxl = etax_outer_core(i,j,k,ispec)
+ etayl = etay_outer_core(i,j,k,ispec)
+ etazl = etaz_outer_core(i,j,k,ispec)
+ gammaxl = gammax_outer_core(i,j,k,ispec)
+ gammayl = gammay_outer_core(i,j,k,ispec)
+ gammazl = gammaz_outer_core(i,j,k,ispec)
- ! density kernel
-! rho_kl_outer_core(i,j,k,ispec) = rho_kl_outer_core(i,j,k,ispec) &
-! + deltat * dot_product(vector_accel(:), b_vector_displ_outer_core(:,iglob))
+ ! calculates gradient grad(b_displ)
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ do l=1,NGLLX
+ tempx1l = tempx1l + b_displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ enddo
+ do l=1,NGLLY
+ tempx2l = tempx2l + b_displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ enddo
+ do l=1,NGLLZ
+ tempx3l = tempx3l + b_displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
+ gradx = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ grady = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ gradz = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
-!! DK DK July 2013: replaces dot_product() with an unrolled expression, otherwise most compilers
-!! DK DK July 2013: will try to vectorize this rather than the outer loop, resulting in a much slower code
- rho_kl_outer_core(i,j,k,ispec) = rho_kl_outer_core(i,j,k,ispec) &
- + deltat * ( vector_accel(1) * b_vector_displ_outer_core(1,iglob) &
- + vector_accel(2) * b_vector_displ_outer_core(2,iglob) &
- + vector_accel(3) * b_vector_displ_outer_core(3,iglob) )
+ ! assigns gradient field on global points
+ b_vector_displ_outer_core(1,iglob) = gradx
+ b_vector_displ_outer_core(2,iglob) = grady
+ b_vector_displ_outer_core(3,iglob) = gradz
- ! bulk modulus kernel
- kappal = rhostore_outer_core(i,j,k,ispec)/kappavstore_outer_core(i,j,k,ispec)
-
- div_displ = kappal * accel_outer_core(iglob)
- b_div_displ = kappal * b_accel_outer_core(iglob)
-
- alpha_kl_outer_core(i,j,k,ispec) = alpha_kl_outer_core(i,j,k,ispec) &
- + deltat * div_displ * b_div_displ
-
- ! calculates gradient grad(displ) (also needed for boundary kernels)
- if(SAVE_BOUNDARY_MESH .or. deviatoric_outercore) then
+ ! calculates gradient grad(accel)
tempx1l = 0._CUSTOM_REAL
tempx2l = 0._CUSTOM_REAL
tempx3l = 0._CUSTOM_REAL
do l=1,NGLLX
- tempx1l = tempx1l + displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ tempx1l = tempx1l + accel_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
enddo
do l=1,NGLLY
- tempx2l = tempx2l + displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ tempx2l = tempx2l + accel_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
enddo
do l=1,NGLLZ
- tempx3l = tempx3l + displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ tempx3l = tempx3l + accel_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
enddo
- vector_displ_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
- vector_displ_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
- vector_displ_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
- endif
+ gradx = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ grady = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ gradz = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
- !deviatoric kernel check
- if( deviatoric_outercore ) then
+ ! assigns gradient field on global points
+ vector_accel_outer_core(1,iglob) = gradx
+ vector_accel_outer_core(2,iglob) = grady
+ vector_accel_outer_core(3,iglob) = gradz
+ ! calculates gradient grad(displ) (also needed for boundary kernels)
+ if( SAVE_BOUNDARY_MESH .or. deviatoric_outercore ) then
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ do l=1,NGLLX
+ tempx1l = tempx1l + displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ enddo
+ do l=1,NGLLY
+ tempx2l = tempx2l + displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ enddo
+ do l=1,NGLLZ
+ tempx3l = tempx3l + displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
+ gradx = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ grady = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ gradz = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+ vector_displ_outer_core(1,iglob) = gradx
+ vector_displ_outer_core(2,iglob) = grady
+ vector_displ_outer_core(3,iglob) = gradz
+ endif
+
+ endif ! mask_ibool
+
+ enddo
+ enddo
+ enddo
+ enddo
+
+ ! acoustic kernels
+ do ispec = 1, NSPEC_OUTER_CORE
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+
+ ! global index
+ iglob = ibool_outer_core(i,j,k,ispec)
+
+ gradx = vector_accel_outer_core(1,iglob) * b_vector_displ_outer_core(1,iglob)
+ grady = vector_accel_outer_core(2,iglob) * b_vector_displ_outer_core(2,iglob)
+ gradz = vector_accel_outer_core(3,iglob) * b_vector_displ_outer_core(3,iglob)
+
+ ! density kernel
+ ! note: we replace dot_product() with an unrolled expression, otherwise most compilers
+ ! will try to vectorize this rather than the outer loop, resulting in a much slower code
+ rho_kl_outer_core(i,j,k,ispec) = rho_kl_outer_core(i,j,k,ispec) + deltat * (gradx + grady + gradz)
+
+ ! bulk modulus kernel
+ kappal = rhostore_outer_core(i,j,k,ispec)/kappavstore_outer_core(i,j,k,ispec)
+ div_displ = kappal * accel_outer_core(iglob)
+ b_div_displ = kappal * b_accel_outer_core(iglob)
+
+ alpha_kl_outer_core(i,j,k,ispec) = alpha_kl_outer_core(i,j,k,ispec) + deltat * div_displ * b_div_displ
+
+ enddo
+ enddo
+ enddo
+ enddo
+
+ !deviatoric kernel check
+ if( deviatoric_outercore ) then
+
+ do ispec = 1, NSPEC_OUTER_CORE
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
tempx1l = 0._CUSTOM_REAL
tempx2l = 0._CUSTOM_REAL
tempx3l = 0._CUSTOM_REAL
@@ -469,13 +523,13 @@
+ 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
epsilondev_loc(5)*b_epsilondev_loc(5)) )
- endif !deviatoric kernel check
-
+ enddo
enddo
enddo
enddo
- enddo
+ endif !deviatoric kernel check
+
else
! updates kernel contribution on GPU
if( deviatoric_outercore ) call exit_mpi(myrank,'deviatoric kernel on GPU not supported yet')
@@ -506,7 +560,7 @@
real(kind=CUSTOM_REAL), dimension(5) :: b_epsilondev_loc
real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: b_epsilondev_loc_matrix
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: b_epsilondev_loc_matrix
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: b_eps_trace_over_3_loc_matrix
integer :: i,j,k,ispec,iglob
@@ -538,11 +592,11 @@
else
! backward/reconstructed strain arrays
b_eps_trace_over_3_loc_matrix(:,:,:) = b_eps_trace_over_3_inner_core(:,:,:,ispec)
- b_epsilondev_loc_matrix(1,:,:,:) = b_epsilondev_xx_inner_core(:,:,:,ispec)
- b_epsilondev_loc_matrix(2,:,:,:) = b_epsilondev_yy_inner_core(:,:,:,ispec)
- b_epsilondev_loc_matrix(3,:,:,:) = b_epsilondev_xy_inner_core(:,:,:,ispec)
- b_epsilondev_loc_matrix(4,:,:,:) = b_epsilondev_xz_inner_core(:,:,:,ispec)
- b_epsilondev_loc_matrix(5,:,:,:) = b_epsilondev_yz_inner_core(:,:,:,ispec)
+ b_epsilondev_loc_matrix(:,:,:,1) = b_epsilondev_xx_inner_core(:,:,:,ispec)
+ b_epsilondev_loc_matrix(:,:,:,2) = b_epsilondev_yy_inner_core(:,:,:,ispec)
+ b_epsilondev_loc_matrix(:,:,:,3) = b_epsilondev_xy_inner_core(:,:,:,ispec)
+ b_epsilondev_loc_matrix(:,:,:,4) = b_epsilondev_xz_inner_core(:,:,:,ispec)
+ b_epsilondev_loc_matrix(:,:,:,5) = b_epsilondev_yz_inner_core(:,:,:,ispec)
endif
do k = 1, NGLLZ
@@ -561,7 +615,11 @@
epsilondev_loc(4) = epsilondev_xz_inner_core(i,j,k,ispec)
epsilondev_loc(5) = epsilondev_yz_inner_core(i,j,k,ispec)
- b_epsilondev_loc(:) = b_epsilondev_loc_matrix(:,i,j,k)
+ b_epsilondev_loc(1) = b_epsilondev_loc_matrix(i,j,k,1)
+ b_epsilondev_loc(2) = b_epsilondev_loc_matrix(i,j,k,2)
+ b_epsilondev_loc(3) = b_epsilondev_loc_matrix(i,j,k,3)
+ b_epsilondev_loc(4) = b_epsilondev_loc_matrix(i,j,k,4)
+ b_epsilondev_loc(5) = b_epsilondev_loc_matrix(i,j,k,5)
beta_kl_inner_core(i,j,k,ispec) = beta_kl_inner_core(i,j,k,ispec) &
+ deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_seismograms.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_seismograms.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -167,7 +167,7 @@
! element strain
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: eps_trace_over_3_loc_crust_mantle
- real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc_crust_mantle
do irec_local = 1,nrec_local
@@ -201,11 +201,11 @@
else
! element adjoint strain
eps_trace_over_3_loc_crust_mantle(:,:,:) = eps_trace_over_3_crust_mantle(:,:,:,ispec)
- epsilondev_loc_crust_mantle(1,:,:,:) = epsilondev_xx_crust_mantle(:,:,:,ispec)
- epsilondev_loc_crust_mantle(2,:,:,:) = epsilondev_yy_crust_mantle(:,:,:,ispec)
- epsilondev_loc_crust_mantle(3,:,:,:) = epsilondev_xy_crust_mantle(:,:,:,ispec)
- epsilondev_loc_crust_mantle(4,:,:,:) = epsilondev_xz_crust_mantle(:,:,:,ispec)
- epsilondev_loc_crust_mantle(5,:,:,:) = epsilondev_yz_crust_mantle(:,:,:,ispec)
+ epsilondev_loc_crust_mantle(:,:,:,1) = epsilondev_xx_crust_mantle(:,:,:,ispec)
+ epsilondev_loc_crust_mantle(:,:,:,2) = epsilondev_yy_crust_mantle(:,:,:,ispec)
+ epsilondev_loc_crust_mantle(:,:,:,3) = epsilondev_xy_crust_mantle(:,:,:,ispec)
+ epsilondev_loc_crust_mantle(:,:,:,4) = epsilondev_xz_crust_mantle(:,:,:,ispec)
+ epsilondev_loc_crust_mantle(:,:,:,5) = epsilondev_yz_crust_mantle(:,:,:,ispec)
endif
! perform the general interpolation using Lagrange polynomials
@@ -223,11 +223,11 @@
eps_trace = eps_trace + dble(eps_trace_over_3_loc_crust_mantle(i,j,k))*hlagrange
- dxx = dxx + dble(epsilondev_loc_crust_mantle(1,i,j,k))*hlagrange
- dyy = dyy + dble(epsilondev_loc_crust_mantle(2,i,j,k))*hlagrange
- dxy = dxy + dble(epsilondev_loc_crust_mantle(3,i,j,k))*hlagrange
- dxz = dxz + dble(epsilondev_loc_crust_mantle(4,i,j,k))*hlagrange
- dyz = dyz + dble(epsilondev_loc_crust_mantle(5,i,j,k))*hlagrange
+ dxx = dxx + dble(epsilondev_loc_crust_mantle(i,j,k,1))*hlagrange
+ dyy = dyy + dble(epsilondev_loc_crust_mantle(i,j,k,2))*hlagrange
+ dxy = dxy + dble(epsilondev_loc_crust_mantle(i,j,k,3))*hlagrange
+ dxz = dxz + dble(epsilondev_loc_crust_mantle(i,j,k,4))*hlagrange
+ dyz = dyz + dble(epsilondev_loc_crust_mantle(i,j,k,5))*hlagrange
displ_s(:,i,j,k) = displ_crust_mantle(:,iglob)
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/get_attenuation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/get_attenuation.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/get_attenuation.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -43,14 +43,14 @@
! this is better, it works fine and these arrays are really huge
! in the crust_mantle region, thus let us not double their size
real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec),intent(out) :: one_minus_sum_beta, scale_factor
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec),intent(out) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,vnspec),intent(out) :: factor_common
double precision, dimension(N_SLS),intent(out) :: tau_s
integer :: iregion_code
! local parameters
- integer :: i,j,k,ispec,ier
+ integer :: i,j,k,ispec,ier,i_sls
double precision, dimension(N_SLS) :: tau_e, fc
double precision :: omsb, Q_mu, sf, T_c_source, scale_t
character(len=150) :: prname
@@ -85,21 +85,32 @@
T_c_source = 1000.0d0 / T_c_source
T_c_source = T_c_source / scale_t
+ ! loops over elements
do ispec = 1, vnspec
+
+ ! loops over GLL points
do k = 1, ATT3_VAL
do j = 1, ATT2_VAL
do i = 1, ATT1_VAL
- tau_e(:) = factor_common(:,i,j,k,ispec)
+
+ ! gets relaxation times for each standard linear solid
+ do i_sls = 1,N_SLS
+ tau_e(i_sls) = factor_common(i,j,k,i_sls,ispec)
+ enddo
Q_mu = scale_factor(i,j,k,ispec)
! Determine the factor_common and one_minus_sum_beta from tau_s and tau_e
call get_attenuation_property_values(tau_s, tau_e, fc, omsb)
if( CUSTOM_REAL == SIZE_REAL ) then
- factor_common(:,i,j,k,ispec) = sngl(fc(:))
+ do i_sls = 1,N_SLS
+ factor_common(i,j,k,i_sls,ispec) = sngl(fc(i_sls))
+ enddo
one_minus_sum_beta(i,j,k,ispec) = sngl(omsb)
else
- factor_common(:,i,j,k,ispec) = fc(:)
+ do i_sls = 1,N_SLS
+ factor_common(i,j,k,i_sls,ispec) = fc(i_sls)
+ enddo
one_minus_sum_beta(i,j,k,ispec) = omsb
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/prepare_timerun.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/prepare_timerun.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -1345,19 +1345,19 @@
! attenuation memory variables
! crust/mantle
- allocate(R_xx_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION), &
- R_yy_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION), &
- R_xy_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION), &
- R_xz_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION), &
- R_yz_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION), &
+ allocate(R_xx_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION), &
+ R_yy_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION), &
+ R_xy_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION), &
+ R_xz_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION), &
+ R_yz_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION), &
stat=ier)
if(ier /= 0) stop 'error: not enough memory to allocate array R_memory_crust_mantle_lddrk'
! inner core
- allocate(R_xx_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION), &
- R_yy_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION), &
- R_xy_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION), &
- R_xz_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION), &
- R_yz_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION), &
+ allocate(R_xx_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_ATTENUATION), &
+ R_yy_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_ATTENUATION), &
+ R_xy_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_ATTENUATION), &
+ R_xz_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_ATTENUATION), &
+ R_yz_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_ATTENUATION), &
stat=ier)
if(ier /= 0) stop 'error: not enough memory to allocate array R_memory_inner_core_lddrk'
if(ATTENUATION_VAL) then
@@ -1375,19 +1375,19 @@
endif
if( SIMULATION_TYPE == 3 ) then
! crust/mantle
- allocate(b_R_xx_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT), &
- b_R_yy_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT), &
- b_R_xy_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT), &
- b_R_xz_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT), &
- b_R_yz_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT), &
+ allocate(b_R_xx_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_STR_AND_ATT), &
+ b_R_yy_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_STR_AND_ATT), &
+ b_R_xy_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_STR_AND_ATT), &
+ b_R_xz_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_STR_AND_ATT), &
+ b_R_yz_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_STR_AND_ATT), &
stat=ier)
if(ier /= 0) stop 'error: not enough memory to allocate array R_memory_crust_mantle_lddrk'
! inner core
- allocate(b_R_xx_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT), &
- b_R_yy_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT), &
- b_R_xy_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT), &
- b_R_xz_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT), &
- b_R_yz_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT), &
+ allocate(b_R_xx_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_STR_AND_ATT), &
+ b_R_yy_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_STR_AND_ATT), &
+ b_R_xy_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_STR_AND_ATT), &
+ b_R_xz_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_STR_AND_ATT), &
+ b_R_yz_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_STR_AND_ATT), &
stat=ier)
if(ier /= 0) stop 'error: not enough memory to allocate array R_memory_inner_core_lddrk'
if(ATTENUATION_VAL) then
@@ -1417,18 +1417,18 @@
if(ier /= 0) stop 'error: not enough memory to allocate array A_array_rotation_lddrk'
allocate(B_array_rotation_lddrk(NGLLX,NGLLY,NGLLZ,1),stat=ier)
if(ier /= 0) stop 'error: not enough memory to allocate array B_array_rotation_lddrk'
- allocate(R_xx_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- R_yy_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- R_xy_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- R_xz_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- R_yz_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
+ allocate(R_xx_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ R_yy_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ R_xy_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ R_xz_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ R_yz_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
stat=ier)
if(ier /= 0) stop 'error: not enough memory to allocate array R_memory_crust_mantle_lddrk'
- allocate(R_xx_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- R_yy_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- R_xy_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- R_xz_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- R_yz_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
+ allocate(R_xx_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ R_yy_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ R_xy_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ R_xz_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ R_yz_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
stat=ier)
if(ier /= 0) stop 'error: not enough memory to allocate array R_memory_inner_core_lddrk'
if( SIMULATION_TYPE == 3 ) then
@@ -1436,18 +1436,18 @@
if(ier /= 0) stop 'error: not enough memory to allocate array b_A_array_rotation_lddrk'
allocate(b_B_array_rotation_lddrk(NGLLX,NGLLY,NGLLZ,1),stat=ier)
if(ier /= 0) stop 'error: not enough memory to allocate array b_B_array_rotation_lddrk'
- allocate(b_R_xx_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- b_R_yy_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- b_R_xy_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- b_R_xz_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- b_R_yz_crust_mantle_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
+ allocate(b_R_xx_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ b_R_yy_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ b_R_xy_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ b_R_xz_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ b_R_yz_crust_mantle_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
stat=ier)
if(ier /= 0) stop 'error: not enough memory to allocate array b_R_memory_crust_mantle_lddrk'
- allocate(b_R_xx_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- b_R_yy_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- b_R_xy_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- b_R_xz_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
- b_R_yz_inner_core_lddrk(N_SLS,NGLLX,NGLLY,NGLLZ,1), &
+ allocate(b_R_xx_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ b_R_yy_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ b_R_xy_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ b_R_xz_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
+ b_R_yz_inner_core_lddrk(NGLLX,NGLLY,NGLLZ,N_SLS,1), &
stat=ier)
if(ier /= 0) stop 'error: not enough memory to allocate array b_R_memory_inner_core_lddrk'
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_attenuation_adios.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_attenuation_adios.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_attenuation_adios.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -43,7 +43,7 @@
integer :: vnspec
real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec) :: scale_factor
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1_VAL,ATT2_VAL,ATT3_VAL,vnspec) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,vnspec) :: factor_common
double precision, dimension(N_SLS) :: tau_s
integer :: iregion_code
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D.F90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D.F90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -408,10 +408,14 @@
! thus it has longer latency to access variables than stack memory variables.
!
! however, declaring the static arrays needed in compute_forces_crust_mantle_Dev()
-! like e.g. sum_terms, tempx1,...B1_m1_m2_5points,... in this main routine and
+! like e.g. sum_terms, tempx1,... in this main routine and
! passing them along as arguments to the routine makes the code slower.
-! it seems that this stack/heap criterion is more complicated.
+! it seems that this stack/heap criterion is more complicated,
+! and inlining functions is a performance criteria as well.
!
+! for vectorization, we asssume that arrays have contiguous memory allocated. this holds true for most compilers and
+! static memory allocation. however, note that dynamically allocated memory could in principle be non-contiguous.
+!
! another reason why the use of modules is restricted is to make the code thread safe.
! having different threads access the same data structure and modifying it at the same time
! would lead to problems. passing arguments is a way to avoid such complications.
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -365,9 +365,10 @@
! memory variables and standard linear solids for attenuation
real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,ATT4_VAL) :: &
one_minus_sum_beta_crust_mantle, factor_scale_crust_mantle
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1_VAL,ATT2_VAL,ATT3_VAL,ATT4_VAL) :: &
+ real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,ATT4_VAL) :: &
factor_common_crust_mantle
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUATION) :: &
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_ATTENUATION) :: &
R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle,R_xz_crust_mantle,R_yz_crust_mantle
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT),target :: &
@@ -378,7 +379,7 @@
eps_trace_over_3_crust_mantle
! ADJOINT
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT) :: &
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_STR_AND_ATT) :: &
b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle,b_R_xz_crust_mantle,b_R_yz_crust_mantle
real(kind=CUSTOM_REAL), dimension(:,:,:,:),pointer :: &
@@ -537,9 +538,10 @@
! memory variables and standard linear solids for attenuation
real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,ATT5_VAL) :: &
one_minus_sum_beta_inner_core, factor_scale_inner_core
- real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1_VAL,ATT2_VAL,ATT3_VAL,ATT5_VAL) :: &
+ real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,ATT5_VAL) :: &
factor_common_inner_core
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_ATTENUATION) :: &
R_xx_inner_core,R_yy_inner_core,R_xy_inner_core,R_xz_inner_core,R_yz_inner_core
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT),target :: &
@@ -550,7 +552,7 @@
eps_trace_over_3_inner_core
! ADJOINT
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT) :: &
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_INNER_CORE_STR_AND_ATT) :: &
b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core,b_R_xz_inner_core,b_R_yz_inner_core
real(kind=CUSTOM_REAL), dimension(:,:,:,:),pointer :: &
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/update_displacement_LDDRK.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/update_displacement_LDDRK.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/update_displacement_LDDRK.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -89,20 +89,18 @@
use specfem_par_outercore
implicit none
-
! local parameters
- integer :: i
+ real(kind=CUSTOM_REAL) :: alpha,beta
- do i=1,NGLOB_OUTER_CORE
- veloc_outer_core_lddrk(i) = ALPHA_LDDRK(istage) * veloc_outer_core_lddrk(i) + deltat * accel_outer_core(i)
+ ! current runge-kutta coefficients
+ alpha = ALPHA_LDDRK(istage)
+ beta = BETA_LDDRK(istage)
- displ_outer_core_lddrk(i) = ALPHA_LDDRK(istage) * displ_outer_core_lddrk(i) + deltat * veloc_outer_core(i)
+ ! forward wavefields
+ call update_acoustic_lddrk(NGLOB_OUTER_CORE,displ_outer_core,veloc_outer_core,accel_outer_core, &
+ displ_outer_core_lddrk,veloc_outer_core_lddrk, &
+ deltat,alpha,beta)
- veloc_outer_core(i) = veloc_outer_core(i) + BETA_LDDRK(istage) * veloc_outer_core_lddrk(i)
-
- displ_outer_core(i) = displ_outer_core(i) + BETA_LDDRK(istage) * displ_outer_core_lddrk(i)
- enddo
-
end subroutine update_veloc_acoustic_lddrk
!
@@ -117,22 +115,63 @@
use specfem_par_outercore
implicit none
+ ! local parameters
+ real(kind=CUSTOM_REAL) :: alpha,beta
+ ! current runge-kutta coefficients
+ alpha = ALPHA_LDDRK(istage)
+ beta = BETA_LDDRK(istage)
+
+ ! backward/reconstructed wavefields
+ call update_acoustic_lddrk(NGLOB_OUTER_CORE_ADJOINT,b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core, &
+ b_displ_outer_core_lddrk,b_veloc_outer_core_lddrk, &
+ b_deltat,alpha,beta)
+
+ end subroutine update_veloc_acoustic_lddrk_backward
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine update_acoustic_lddrk(NGLOB,displ,veloc,accel,displ_lddrk,veloc_lddrk,deltat,alpha,beta)
+
+! updates acceleration and velocity in outer core
+
+ use constants,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: NGLOB
+ real(kind=CUSTOM_REAL),dimension(NGLOB),intent(inout) :: displ,veloc,accel
+ real(kind=CUSTOM_REAL),dimension(NGLOB),intent(inout) :: displ_lddrk,veloc_lddrk
+
+ real(kind=CUSTOM_REAL),intent(in) :: deltat
+
+ ! runge-kutta coefficients
+ real(kind=CUSTOM_REAL),intent(in) :: alpha,beta
+
! local parameters
integer :: i
- do i=1,NGLOB_OUTER_CORE
- b_veloc_outer_core_lddrk(i) = ALPHA_LDDRK(istage) * b_veloc_outer_core_lddrk(i) + b_deltat * b_accel_outer_core(i)
+ ! runge-kutta scheme update
- b_displ_outer_core_lddrk(i) = ALPHA_LDDRK(istage) * b_displ_outer_core_lddrk(i) + b_deltat * b_veloc_outer_core(i)
+ ! note: splitting the do-loops seems to be slightly more effective
- b_veloc_outer_core(i) = b_veloc_outer_core(i) + BETA_LDDRK(istage) * b_veloc_outer_core_lddrk(i)
-
- b_displ_outer_core(i) = b_displ_outer_core(i) + BETA_LDDRK(istage) * b_displ_outer_core_lddrk(i)
+ ! low-memory runge-kutta: intermediate storage wavefields
+ do i=1,NGLOB
+ veloc_lddrk(i) = alpha * veloc_lddrk(i) + deltat * accel(i)
+ displ_lddrk(i) = alpha * displ_lddrk(i) + deltat * veloc(i)
enddo
+ ! updates wavefields
+ do i=1,NGLOB
+ veloc(i) = veloc(i) + beta * veloc_lddrk(i)
+ displ(i) = displ(i) + beta * displ_lddrk(i)
+ enddo
- end subroutine update_veloc_acoustic_lddrk_backward
+ end subroutine update_acoustic_lddrk
+
!
!-------------------------------------------------------------------------------------------------
!
@@ -150,36 +189,25 @@
use specfem_par_innercore
implicit none
-
! local parameters
- integer :: i
+ real(kind=CUSTOM_REAL) :: alpha,beta
+ ! current runge-kutta coefficients
+ alpha = ALPHA_LDDRK(istage)
+ beta = BETA_LDDRK(istage)
+
+ ! forward wavefields
! crust/mantle
- do i=1,NGLOB_CRUST_MANTLE
- veloc_crust_mantle_lddrk(:,i) = ALPHA_LDDRK(istage) * veloc_crust_mantle_lddrk(:,i) &
- + deltat * accel_crust_mantle(:,i)
+ call update_elastic_lddrk(NGLOB_CRUST_MANTLE,displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
+ displ_crust_mantle_lddrk,veloc_crust_mantle_lddrk, &
+ deltat,alpha,beta)
- displ_crust_mantle_lddrk(:,i) = ALPHA_LDDRK(istage) * displ_crust_mantle_lddrk(:,i) &
- + deltat * veloc_crust_mantle(:,i)
-
- veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + BETA_LDDRK(istage) * veloc_crust_mantle_lddrk(:,i)
-
- displ_crust_mantle(:,i) = displ_crust_mantle(:,i) + BETA_LDDRK(istage) * displ_crust_mantle_lddrk(:,i)
- enddo
-
! inner core
- do i=1,NGLOB_INNER_CORE
- veloc_inner_core_lddrk(:,i) = ALPHA_LDDRK(istage) * veloc_inner_core_lddrk(:,i) &
- + deltat * accel_inner_core(:,i)
+ call update_elastic_lddrk(NGLOB_INNER_CORE,displ_inner_core,veloc_inner_core,accel_inner_core, &
+ displ_inner_core_lddrk,veloc_inner_core_lddrk, &
+ deltat,alpha,beta)
- displ_inner_core_lddrk(:,i) = ALPHA_LDDRK(istage) * displ_inner_core_lddrk(:,i) &
- + deltat * veloc_inner_core(:,i)
- veloc_inner_core(:,i) = veloc_inner_core(:,i) + BETA_LDDRK(istage) * veloc_inner_core_lddrk(:,i)
-
- displ_inner_core(:,i) = displ_inner_core(:,i) + BETA_LDDRK(istage) * displ_inner_core_lddrk(:,i)
- enddo
-
end subroutine update_veloc_elastic_lddrk
!
@@ -195,35 +223,84 @@
use specfem_par_innercore
implicit none
+ ! local parameters
+ real(kind=CUSTOM_REAL) :: alpha,beta
+ ! current runge-kutta coefficients
+ alpha = ALPHA_LDDRK(istage)
+ beta = BETA_LDDRK(istage)
+
+ ! backward/reconstructed wavefields
+ ! crust/mantle
+ call update_elastic_lddrk(NGLOB_CRUST_MANTLE_ADJOINT,b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
+ b_displ_crust_mantle_lddrk,b_veloc_crust_mantle_lddrk, &
+ b_deltat,alpha,beta)
+
+ ! inner core
+ call update_elastic_lddrk(NGLOB_INNER_CORE_ADJOINT,b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
+ b_displ_inner_core_lddrk,b_veloc_inner_core_lddrk, &
+ b_deltat,alpha,beta)
+
+ end subroutine update_veloc_elastic_lddrk_backward
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine update_elastic_lddrk(NGLOB,displ,veloc,accel, &
+ displ_lddrk,veloc_lddrk, &
+ deltat,alpha,beta)
+
+
+ use constants_solver,only: CUSTOM_REAL,NDIM,FORCE_VECTORIZATION_VAL
+
+ implicit none
+
+ integer,intent(in) :: NGLOB
+
+ ! wavefields
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB),intent(inout) :: displ,veloc,accel
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB),intent(inout) :: displ_lddrk,veloc_lddrk
+
+ real(kind=CUSTOM_REAL),intent(in) :: deltat
+ ! runge-kutta coefficients
+ real(kind=CUSTOM_REAL),intent(in) :: alpha,beta
+
! local parameters
integer :: i
- ! crust/mantle
- do i=1,NGLOB_CRUST_MANTLE
- b_veloc_crust_mantle_lddrk(:,i) = ALPHA_LDDRK(istage) * b_veloc_crust_mantle_lddrk(:,i) &
- + b_deltat * b_accel_crust_mantle(:,i)
+ ! low-memory runge-kutta scheme
- b_displ_crust_mantle_lddrk(:,i) = ALPHA_LDDRK(istage) * b_displ_crust_mantle_lddrk(:,i) &
- + b_deltat * b_veloc_crust_mantle(:,i)
+ if(FORCE_VECTORIZATION_VAL) then
- b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) + BETA_LDDRK(istage) * b_veloc_crust_mantle_lddrk(:,i)
+ ! note: splitting the do-loops seems to be slightly more effective
- b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) + BETA_LDDRK(istage) * b_displ_crust_mantle_lddrk(:,i)
- enddo
+ ! low-memory runge-kutta: intermediate storage wavefields
+ do i=1,NGLOB * NDIM
+ veloc_lddrk(i,1) = alpha * veloc_lddrk(i,1) + deltat * accel(i,1)
+ displ_lddrk(i,1) = alpha * displ_lddrk(i,1) + deltat * veloc(i,1)
+ enddo
+ ! updates wavefields
+ do i=1,NGLOB * NDIM
+ veloc(i,1) = veloc(i,1) + beta * veloc_lddrk(i,1)
+ displ(i,1) = displ(i,1) + beta * displ_lddrk(i,1)
+ enddo
- ! inner core
- do i=1,NGLOB_INNER_CORE
- b_veloc_inner_core_lddrk(:,i) = ALPHA_LDDRK(istage) * b_veloc_inner_core_lddrk(:,i) &
- + b_deltat * b_accel_inner_core(:,i)
+ else
- b_displ_inner_core_lddrk(:,i) = ALPHA_LDDRK(istage) * b_displ_inner_core_lddrk(:,i) &
- + b_deltat * b_veloc_inner_core(:,i)
+ ! non-vectorized loops
- b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) + BETA_LDDRK(istage) * b_veloc_inner_core_lddrk(:,i)
+ do i=1,NGLOB
+ ! low-memory runge-kutta: intermediate storage wavefields
+ veloc_lddrk(:,i) = alpha * veloc_lddrk(:,i) + deltat * accel(:,i)
+ displ_lddrk(:,i) = alpha * displ_lddrk(:,i) + deltat * veloc(:,i)
+ ! updates wavefields
+ veloc(:,i) = veloc(:,i) + beta * veloc_lddrk(:,i)
+ displ(:,i) = displ(:,i) + beta * displ_lddrk(:,i)
+ enddo
- b_displ_inner_core(:,i) = b_displ_inner_core(:,i) + BETA_LDDRK(istage) * b_displ_inner_core_lddrk(:,i)
- enddo
+ endif
- end subroutine update_veloc_elastic_lddrk_backward
+ end subroutine update_elastic_lddrk
+
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/update_displacement_Newmark.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/update_displacement_Newmark.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/update_displacement_Newmark.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -371,15 +371,15 @@
implicit none
- integer :: NGLOB_CM,NGLOB_IC
+ integer,intent(in) :: NGLOB_CM,NGLOB_IC
! acceleration & velocity
! crust/mantle region
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CM) :: veloc_crust_mantle,accel_crust_mantle
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CM),intent(inout) :: veloc_crust_mantle,accel_crust_mantle
! inner core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_IC) :: veloc_inner_core,accel_inner_core
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_IC),intent(inout) :: veloc_inner_core,accel_inner_core
- real(kind=CUSTOM_REAL) :: deltatover2
+ real(kind=CUSTOM_REAL),intent(in) :: deltatover2
! local parameters
integer :: i
@@ -393,30 +393,26 @@
! - inner core region
! needs both, acceleration update & velocity corrector terms
- ! crust/mantle
if(FORCE_VECTORIZATION_VAL) then
+ ! crust/mantle
do i=1,NGLOB_CM * NDIM
veloc_crust_mantle(i,1) = veloc_crust_mantle(i,1) + deltatover2*accel_crust_mantle(i,1)
enddo
- else
-
- do i=1,NGLOB_CM
- veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
- enddo
-
- endif
-
- ! inner core
- if(FORCE_VECTORIZATION_VAL) then
-
+ ! inner core
do i=1,NGLOB_IC * NDIM
veloc_inner_core(i,1) = veloc_inner_core(i,1) + deltatover2*accel_inner_core(i,1)
enddo
else
+ ! crust/mantle
+ do i=1,NGLOB_CM
+ veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
+ enddo
+
+ ! inner core
do i=1,NGLOB_IC
veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/utils/attenuation/attenuation_output.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/utils/attenuation/attenuation_output.f90 2013-11-01 21:27:52 UTC (rev 22976)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/utils/attenuation/attenuation_output.f90 2013-11-04 08:16:42 UTC (rev 22977)
@@ -21,7 +21,7 @@
integer myrank, vnspec, process, iregion
character(len=150) prname, LOCAL_PATH
double precision, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_AC) :: one_minus_sum_beta, scale_factor
- double precision, dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_AC) :: factor_common
+ double precision, dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_CRUST_MANTLE_AC) :: factor_common
double precision, dimension(N_SLS) :: tau_s
double precision T_c_source
@@ -56,9 +56,9 @@
do k = 1, NGLLZ
do j = 1, NGLLY
do i = 1, NGLLX
- write(*,*)' tau_mu(1) = ', factor_common(1,i,j,k,ispec)
- write(*,*)' tau_mu(2) = ', factor_common(2,i,j,k,ispec)
- write(*,*)' tau_mu(3) = ', factor_common(3,i,j,k,ispec)
+ write(*,*)' tau_mu(1) = ', factor_common(i,j,k,1,ispec)
+ write(*,*)' tau_mu(2) = ', factor_common(i,j,k,2,ispec)
+ write(*,*)' tau_mu(3) = ', factor_common(i,j,k,3,ispec)
write(*,*)' Qmu = ', scale_factor(i,j,k,ispec)
write(*,*)
enddo
More information about the CIG-COMMITS
mailing list