[cig-commits] r21928 - seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D
dkomati1 at geodynamics.org
dkomati1 at geodynamics.org
Wed Apr 24 16:33:11 PDT 2013
Author: dkomati1
Date: 2013-04-24 16:33:11 -0700 (Wed, 24 Apr 2013)
New Revision: 21928
Modified:
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
Log:
done suppressing _HANDOPT, which was unused; we know how to vectorize the code better, we will do that once it is restructured to undo attenuation
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90 2013-04-24 23:23:11 UTC (rev 21927)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_element.F90 2013-04-24 23:33:11 UTC (rev 21928)
@@ -25,21 +25,6 @@
!
!=====================================================================
-! preprocessing definition: #define _HANDOPT : turns hand-optimized code on
-! #undef _HANDOPT : turns hand-optimized code off
-! or compile with: -D_HANDOPT
-!#define _HANDOPT
-
-! BEWARE:
-! BEWARE: we have observed that using _HANDOPT in combination with -O3 or higher can lead to problems on some machines;
-! BEWARE: thus, be careful when using it. At the very least, run the same test simulation once with _HANDOPT and once without
-! BEWARE: and make sure that all the seismograms you get are the same down to roundoff noise.
-! BEWARE:
-
-! note: these hand optimizations should help compilers to pipeline the code and make better use of the cache;
-! depending on compilers, it can further decrease the computation time by ~ 30%.
-! the original routines are commented with "! way 1", the hand-optimized routines with "! way 2"
-
subroutine compute_element_iso(ispec, &
minus_gravity_table,density_table,minus_deriv_gravity_table, &
xstore,ystore,zstore, &
@@ -1286,82 +1271,20 @@
! local parameters
real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1
integer :: i_SLS
-#ifdef _HANDOPT
- real(kind=CUSTOM_REAL) R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
- integer :: imodulo_N_SLS
- integer :: i_SLS1,i_SLS2
-#endif
-#ifdef _HANDOPT
-! way 2:
-! note: this should help compilers to pipeline the code and make better use of the cache;
-! depending on compilers, it can further decrease the computation time by ~ 30%.
-! by default, N_SLS = 3, therefore we take steps of 3
- imodulo_N_SLS = mod(N_SLS,3)
-
- if(imodulo_N_SLS >= 1) then
- do i_SLS = 1,imodulo_N_SLS
- R_xx_val1 = R_memory_loc(1,i_SLS)
- R_yy_val1 = R_memory_loc(2,i_SLS)
- sigma_xx = sigma_xx - R_xx_val1
- sigma_yy = sigma_yy - R_yy_val1
- sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
- sigma_xy = sigma_xy - R_memory_loc(3,i_SLS)
- sigma_xz = sigma_xz - R_memory_loc(4,i_SLS)
- sigma_yz = sigma_yz - R_memory_loc(5,i_SLS)
- enddo
- endif
- if(N_SLS >= imodulo_N_SLS+1) then
- ! note: another possibility would be using a reduction example for this loop; was tested but it does not improve,
- ! probably since N_SLS == 3 is too small for a loop benefit
- do i_SLS = imodulo_N_SLS+1,N_SLS,3
- R_xx_val1 = R_memory_loc(1,i_SLS)
- R_yy_val1 = R_memory_loc(2,i_SLS)
- sigma_xx = sigma_xx - R_xx_val1
- sigma_yy = sigma_yy - R_yy_val1
- sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
- sigma_xy = sigma_xy - R_memory_loc(3,i_SLS)
- sigma_xz = sigma_xz - R_memory_loc(4,i_SLS)
- sigma_yz = sigma_yz - R_memory_loc(5,i_SLS)
-
- i_SLS1=i_SLS+1
- R_xx_val2 = R_memory_loc(1,i_SLS1)
- R_yy_val2 = R_memory_loc(2,i_SLS1)
- sigma_xx = sigma_xx - R_xx_val2
- sigma_yy = sigma_yy - R_yy_val2
- sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
- sigma_xy = sigma_xy - R_memory_loc(3,i_SLS1)
- sigma_xz = sigma_xz - R_memory_loc(4,i_SLS1)
- sigma_yz = sigma_yz - R_memory_loc(5,i_SLS1)
-
- i_SLS2 =i_SLS+2
- R_xx_val3 = R_memory_loc(1,i_SLS2)
- R_yy_val3 = R_memory_loc(2,i_SLS2)
- sigma_xx = sigma_xx - R_xx_val3
- sigma_yy = sigma_yy - R_yy_val3
- sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
- sigma_xy = sigma_xy - R_memory_loc(3,i_SLS2)
- sigma_xz = sigma_xz - R_memory_loc(4,i_SLS2)
- sigma_yz = sigma_yz - R_memory_loc(5,i_SLS2)
- enddo
- endif
-#else
-! way 1:
do i_SLS = 1,N_SLS
- R_xx_val1 = R_memory_loc(1,i_SLS) ! R_memory(1,i_SLS,i,j,k,ispec)
- R_yy_val1 = R_memory_loc(2,i_SLS) ! R_memory(2,i_SLS,i,j,k,ispec)
+ R_xx_val1 = R_memory_loc(1,i_SLS)
+ R_yy_val1 = R_memory_loc(2,i_SLS)
sigma_xx = sigma_xx - R_xx_val1
sigma_yy = sigma_yy - R_yy_val1
sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
- sigma_xy = sigma_xy - R_memory_loc(3,i_SLS) ! R_memory(3,i_SLS,i,j,k,ispec)
- sigma_xz = sigma_xz - R_memory_loc(4,i_SLS) ! R_memory(4,i_SLS,i,j,k,ispec)
- sigma_yz = sigma_yz - R_memory_loc(5,i_SLS) ! R_memory(5,i_SLS,i,j,k,ispec)
+ sigma_xy = sigma_xy - R_memory_loc(3,i_SLS)
+ sigma_xz = sigma_xz - R_memory_loc(4,i_SLS)
+ sigma_yz = sigma_yz - R_memory_loc(5,i_SLS)
enddo
-#endif
end subroutine compute_element_att_stress
-
!
!--------------------------------------------------------------------------------------------
!
@@ -1415,12 +1338,7 @@
real(kind=CUSTOM_REAL), dimension(NGLLX, NGLLY, NGLLZ) :: factor_common_c44_muv
integer :: i_SLS
-#ifdef _HANDOPT
- real(kind=CUSTOM_REAL) :: alphal,betal,gammal
- integer :: i,j,k
-#else
integer :: i_memory
-#endif
! use Runge-Kutta scheme to march in time
@@ -1428,14 +1346,7 @@
! IMPROVE we use mu_v here even if there is some anisotropy
! IMPROVE we should probably use an average value instead
-#ifdef _HANDOPT
-! way 2:
do i_SLS = 1,N_SLS
-
- alphal = alphaval(i_SLS)
- betal = betaval(i_SLS)
- gammal = gammaval(i_SLS)
-
! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
factor_common_c44_muv(:,:,:) = factor_common(i_SLS,:,:,:,ispec)
@@ -1445,38 +1356,13 @@
factor_common_c44_muv(:,:,:) = factor_common_c44_muv(:,:,:) * muvstore(:,:,:,ispec)
endif
- ! this helps to vectorize the inner most loop
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- R_memory(:,i_SLS,i,j,k,ispec) = alphal * R_memory(:,i_SLS,i,j,k,ispec) &
- + factor_common_c44_muv(i,j,k) &
- *( betal * epsilondev(:,i,j,k,ispec) + gammal * epsilondev_loc(:,i,j,k))
- enddo
- enddo
- enddo
- enddo ! i_SLS
-#else
-! way 1:
- do i_SLS = 1,N_SLS
- ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
- factor_common_c44_muv(:,:,:) = factor_common(i_SLS,:,:,:,ispec)
-
- if(ANISOTROPIC_3D_MANTLE_VAL) then
- factor_common_c44_muv(:,:,:) = factor_common_c44_muv(:,:,:) * c44store(:,:,:,ispec)
- else
- factor_common_c44_muv(:,:,:) = factor_common_c44_muv(:,:,:) * muvstore(:,:,:,ispec)
- endif
-
do i_memory = 1,5
R_memory(i_memory,i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_memory(i_memory,i_SLS,:,:,:,ispec) &
+ factor_common_c44_muv(:,:,:) &
* (betaval(i_SLS) * epsilondev(i_memory,:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
enddo
enddo ! i_SLS
-#endif
-
end subroutine compute_element_att_memory_cr
!
@@ -1532,12 +1418,7 @@
integer :: i_SLS
-#ifdef _HANDOPT
- real(kind=CUSTOM_REAL) :: alphal,betal,gammal
- integer :: i,j,k
-#else
integer :: i_memory
-#endif
! use Runge-Kutta scheme to march in time
@@ -1545,43 +1426,14 @@
! IMPROVE we use mu_v here even if there is some anisotropy
! IMPROVE we should probably use an average value instead
-#ifdef _HANDOPT
-! way 2:
do i_SLS = 1,N_SLS
-
- alphal = alphaval(i_SLS)
- betal = betaval(i_SLS)
- gammal = gammaval(i_SLS)
-
- ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
factor_common_use(:,:,:) = factor_common(i_SLS,:,:,:,ispec)
-
- factor_common_use(:,:,:) = factor_common_use(:,:,:) * muvstore(:,:,:,ispec)
-
- ! this helps to vectorize the inner most loop
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- R_memory(:,i_SLS,i,j,k,ispec) = alphal * R_memory(:,i_SLS,i,j,k,ispec) &
- + factor_common_use(i,j,k) &
- *( betal * epsilondev(:,i,j,k,ispec) + gammal * epsilondev_loc(:,i,j,k))
- enddo
- enddo
- enddo
-
- enddo ! i_SLS
-#else
-! way 1:
- do i_SLS = 1,N_SLS
- factor_common_use(:,:,:) = factor_common(i_SLS,:,:,:,ispec)
do i_memory = 1,5
R_memory(i_memory,i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_memory(i_memory,i_SLS,:,:,:,ispec) &
+ muvstore(:,:,:,ispec) * factor_common_use(:,:,:) * &
(betaval(i_SLS) * epsilondev(i_memory,:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
enddo
enddo
-#endif
end subroutine compute_element_att_memory_ic
-
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-04-24 23:23:11 UTC (rev 21927)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-04-24 23:33:11 UTC (rev 21928)
@@ -25,25 +25,9 @@
!
!=====================================================================
-! preprocessing definition: #define _HANDOPT : turns hand-optimized code on
-! #undef _HANDOPT : turns hand-optimized code off
-! or compile with: -D_HANDOPT
-!#define _HANDOPT
-
-! BEWARE:
-! BEWARE: we have observed that using _HANDOPT in combination with -O3 or higher can lead to problems on some machines;
-! BEWARE: thus, be careful when using it. At the very least, run the same test simulation once with _HANDOPT and once without
-! BEWARE: and make sure that all the seismograms you get are the same down to roundoff noise.
-! BEWARE:
-
-! note: these hand optimizations should help compilers to pipeline the code and make better use of the cache;
-! depending on compilers, it can further decrease the computation time by ~ 30%.
-! the original routines are commented with "! way 1", the hand-optimized routines with "! way 2"
-
subroutine compute_forces_crust_mantle_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
displ_crust_mantle,accel_crust_mantle,xstore,ystore,zstore, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-!----------------------
is_on_a_slice_edge_crust_mantle,icall, &
accel_inner_core,ibool_inner_core,idoubling_inner_core, &
myrank,iproc_xi,iproc_eta,ichunk,addressing, &
@@ -60,7 +44,6 @@
nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
hprime_xx,hprime_xxT, &
hprimewgll_xx,hprimewgll_xxT, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
@@ -69,7 +52,6 @@
c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
ibool,ispec_is_tiso, &
- ! --idoubling,
R_memory,epsilondev,epsilon_trace_over_3,one_minus_sum_beta, &
alphaval,betaval,gammaval,factor_common,vx,vy,vz,vnspec)
@@ -245,10 +227,6 @@
integer NSPEC2D_BOTTOM_INNER_CORE,iend,ispec_glob
integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
-#ifdef _HANDOPT
- integer, dimension(5) :: iglobv5
-#endif
-
! ****************************************************
! big loop over all spectral elements in the solid
! ****************************************************
@@ -283,9 +261,6 @@
!$OMP tempx1,tempx2,tempx3, &
!$OMP newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3, &
!$OMP dummyx_loc,dummyy_loc,dummyz_loc,rho_s_H,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
-#ifdef _HANDOPT
-!$OMP iglobv5, &
-#endif
!$OMP iglob1,epsilondev_loc)
do ispec_glob = 1,NSPEC_CRUST_MANTLE,ELEMENTS_NONBLOCKING_CM_IC
@@ -344,43 +319,15 @@
! pages 386 and 389 and Figure 8.3.1
do k=1,NGLLZ
do j=1,NGLLY
-
-#ifdef _HANDOPT
-! way 2:
- ! since we know that NGLLX = 5, this should help pipelining
- iglobv5(:) = ibool(:,j,k,ispec)
-
- dummyx_loc(1,j,k) = displ_crust_mantle(1,iglobv5(1))
- dummyy_loc(1,j,k) = displ_crust_mantle(2,iglobv5(1))
- dummyz_loc(1,j,k) = displ_crust_mantle(3,iglobv5(1))
-
- dummyx_loc(2,j,k) = displ_crust_mantle(1,iglobv5(2))
- dummyy_loc(2,j,k) = displ_crust_mantle(2,iglobv5(2))
- dummyz_loc(2,j,k) = displ_crust_mantle(3,iglobv5(2))
-
- dummyx_loc(3,j,k) = displ_crust_mantle(1,iglobv5(3))
- dummyy_loc(3,j,k) = displ_crust_mantle(2,iglobv5(3))
- dummyz_loc(3,j,k) = displ_crust_mantle(3,iglobv5(3))
-
- dummyx_loc(4,j,k) = displ_crust_mantle(1,iglobv5(4))
- dummyy_loc(4,j,k) = displ_crust_mantle(2,iglobv5(4))
- dummyz_loc(4,j,k) = displ_crust_mantle(3,iglobv5(4))
-
- dummyx_loc(5,j,k) = displ_crust_mantle(1,iglobv5(5))
- dummyy_loc(5,j,k) = displ_crust_mantle(2,iglobv5(5))
- dummyz_loc(5,j,k) = displ_crust_mantle(3,iglobv5(5))
-
-#else
-! way 1:
do i=1,NGLLX
iglob1 = ibool(i,j,k,ispec)
dummyx_loc(i,j,k) = displ_crust_mantle(1,iglob1)
dummyy_loc(i,j,k) = displ_crust_mantle(2,iglob1)
dummyz_loc(i,j,k) = displ_crust_mantle(3,iglob1)
enddo
-#endif
enddo
enddo
+
do j=1,m2
do i=1,m1
C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
@@ -402,6 +349,7 @@
hprime_xx(i,5)*B3_m1_m2_5points(5,j)
enddo
enddo
+
do j=1,m1
do i=1,m1
! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
@@ -426,6 +374,7 @@
enddo
enddo
enddo
+
do j=1,m1
do i=1,m2
C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
@@ -520,6 +469,7 @@
hprimewgll_xxT(i,5)*C3_m1_m2_5points(5,j)
enddo
enddo
+
do i=1,m1
do j=1,m1
! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
@@ -544,6 +494,7 @@
enddo
enddo
enddo
+
do j=1,m1
do i=1,m2
E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
@@ -581,31 +532,16 @@
if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
enddo ! NGLLX
-
enddo ! NGLLY
enddo ! NGLLZ
! sum contributions from each element to the global mesh and add gravity terms
do k=1,NGLLZ
do j=1,NGLLY
-
-#ifdef _HANDOPT
-! way 2:
- iglobv5(:) = ibool(:,j,k,ispec)
-
- accel_crust_mantle(:,iglobv5(1)) = accel_crust_mantle(:,iglobv5(1)) + sum_terms(:,1,j,k)
- accel_crust_mantle(:,iglobv5(2)) = accel_crust_mantle(:,iglobv5(2)) + sum_terms(:,2,j,k)
- accel_crust_mantle(:,iglobv5(3)) = accel_crust_mantle(:,iglobv5(3)) + sum_terms(:,3,j,k)
- accel_crust_mantle(:,iglobv5(4)) = accel_crust_mantle(:,iglobv5(4)) + sum_terms(:,4,j,k)
- accel_crust_mantle(:,iglobv5(5)) = accel_crust_mantle(:,iglobv5(5)) + sum_terms(:,5,j,k)
-
-#else
-! way 1:
do i=1,NGLLX
iglob1 = ibool(i,j,k,ispec)
accel_crust_mantle(:,iglob1) = accel_crust_mantle(:,iglob1) + sum_terms(:,i,j,k)
enddo
-#endif
enddo
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-04-24 23:23:11 UTC (rev 21927)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-04-24 23:33:11 UTC (rev 21928)
@@ -25,25 +25,9 @@
!
!=====================================================================
-! preprocessing definition: #define _HANDOPT : turns hand-optimized code on
-! #undef _HANDOPT : turns hand-optimized code off
-! or compile with: -D_HANDOPT
-!#define _HANDOPT
-
-! BEWARE:
-! BEWARE: we have observed that using _HANDOPT in combination with -O3 or higher can lead to problems on some machines;
-! BEWARE: thus, be careful when using it. At the very least, run the same test simulation once with _HANDOPT and once without
-! BEWARE: and make sure that all the seismograms you get are the same down to roundoff noise.
-! BEWARE:
-
-! note: these hand optimizations should help compilers to pipeline the code and make better use of the cache;
-! depending on compilers, it can further decrease the computation time by ~ 30%.
-! the original routines are commented with "! way 1", the hand-optimized routines with "! way 2"
-
subroutine compute_forces_inner_core_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
displ_inner_core,accel_inner_core,xstore,ystore,zstore, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-!----------------------
is_on_a_slice_edge_inner_core,icall, &
accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
myrank,iproc_xi,iproc_eta,ichunk,addressing, &
@@ -60,7 +44,6 @@
nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
-!----------------------
hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
kappavstore,muvstore,ibool,idoubling, &
@@ -254,10 +237,6 @@
integer NSPEC2D_BOTTOM_INNER_CORE
integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
-#ifdef _HANDOPT
- integer, dimension(5) :: iglobv5
-#endif
-
! ****************************************************
! big loop over all spectral elements in the solid
! ****************************************************
@@ -309,40 +288,12 @@
do k=1,NGLLZ
do j=1,NGLLY
-#ifdef _HANDOPT
-! way 2:
- ! since we know that NGLLX = 5, this should help pipelining
- iglobv5(:) = ibool(:,j,k,ispec)
-
- dummyx_loc(1,j,k) = displ_inner_core(1,iglobv5(1))
- dummyy_loc(1,j,k) = displ_inner_core(2,iglobv5(1))
- dummyz_loc(1,j,k) = displ_inner_core(3,iglobv5(1))
-
- dummyx_loc(2,j,k) = displ_inner_core(1,iglobv5(2))
- dummyy_loc(2,j,k) = displ_inner_core(2,iglobv5(2))
- dummyz_loc(2,j,k) = displ_inner_core(3,iglobv5(2))
-
- dummyx_loc(3,j,k) = displ_inner_core(1,iglobv5(3))
- dummyy_loc(3,j,k) = displ_inner_core(2,iglobv5(3))
- dummyz_loc(3,j,k) = displ_inner_core(3,iglobv5(3))
-
- dummyx_loc(4,j,k) = displ_inner_core(1,iglobv5(4))
- dummyy_loc(4,j,k) = displ_inner_core(2,iglobv5(4))
- dummyz_loc(4,j,k) = displ_inner_core(3,iglobv5(4))
-
- dummyx_loc(5,j,k) = displ_inner_core(1,iglobv5(5))
- dummyy_loc(5,j,k) = displ_inner_core(2,iglobv5(5))
- dummyz_loc(5,j,k) = displ_inner_core(3,iglobv5(5))
-
-#else
-! way 1:
do i=1,NGLLX
iglob1 = ibool(i,j,k,ispec)
dummyx_loc(i,j,k) = displ_inner_core(1,iglob1)
dummyy_loc(i,j,k) = displ_inner_core(2,iglob1)
dummyz_loc(i,j,k) = displ_inner_core(3,iglob1)
enddo
-#endif
enddo
enddo
@@ -367,6 +318,7 @@
hprime_xx(i,5)*B3_m1_m2_5points(5,j)
enddo
enddo
+
do j=1,m1
do i=1,m1
! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
@@ -391,6 +343,7 @@
enddo
enddo
enddo
+
do j=1,m1
do i=1,m2
C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
@@ -704,6 +657,7 @@
hprimewgll_xxT(i,5)*C3_m1_m2_5points(5,j)
enddo
enddo
+
do i=1,m1
do j=1,m1
! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
@@ -728,6 +682,7 @@
enddo
enddo
enddo
+
do j=1,m1
do i=1,m2
E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
@@ -771,23 +726,10 @@
! sum contributions from each element to the global mesh and add gravity terms
do k=1,NGLLZ
do j=1,NGLLY
-#ifdef _HANDOPT
-! way 2:
- iglobv5(:) = ibool(:,j,k,ispec)
-
- accel_inner_core(:,iglobv5(1)) = accel_inner_core(:,iglobv5(1)) + sum_terms(:,1,j,k)
- accel_inner_core(:,iglobv5(2)) = accel_inner_core(:,iglobv5(2)) + sum_terms(:,2,j,k)
- accel_inner_core(:,iglobv5(3)) = accel_inner_core(:,iglobv5(3)) + sum_terms(:,3,j,k)
- accel_inner_core(:,iglobv5(4)) = accel_inner_core(:,iglobv5(4)) + sum_terms(:,4,j,k)
- accel_inner_core(:,iglobv5(5)) = accel_inner_core(:,iglobv5(5)) + sum_terms(:,5,j,k)
-
-#else
-! way 1:
do i=1,NGLLX
iglob1 = ibool(i,j,k,ispec)
accel_inner_core(:,iglob1) = accel_inner_core(:,iglob1) + sum_terms(:,i,j,k)
enddo
-#endif
enddo
enddo
More information about the CIG-COMMITS
mailing list