[cig-commits] r22613 - seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D
dkomati1 at geodynamics.org
dkomati1 at geodynamics.org
Mon Jul 15 06:13:18 PDT 2013
Author: dkomati1
Date: 2013-07-15 06:13:18 -0700 (Mon, 15 Jul 2013)
New Revision: 22613
Modified:
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part1_classical.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part1_undo_att.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part2_classical.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part2_undo_att.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D.F90
Log:
done vectorizing part 1; I now get some warnings from Intel ifort because include files are apparently not processed by the FPP preprocessor even if they are named *.F90
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part1_classical.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part1_classical.F90 2013-07-15 12:38:46 UTC (rev 22612)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part1_classical.F90 2013-07-15 13:13:18 UTC (rev 22613)
@@ -7,6 +7,15 @@
! Newmark time scheme update
! mantle
+#ifdef FORCE_VECTORIZATION
+ do i=1,NGLOB_CRUST_MANTLE*NDIM
+ displ_crust_mantle(i,1) = displ_crust_mantle(i,1) &
+ + deltat*veloc_crust_mantle(i,1) + deltatsqover2*accel_crust_mantle(i,1)
+ veloc_crust_mantle(i,1) = veloc_crust_mantle(i,1) &
+ + deltatover2*accel_crust_mantle(i,1)
+ accel_crust_mantle(i,1) = 0._CUSTOM_REAL
+ enddo
+#else
do i=1,NGLOB_CRUST_MANTLE
displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
+ deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
@@ -14,6 +23,8 @@
+ deltatover2*accel_crust_mantle(:,i)
accel_crust_mantle(:,i) = 0._CUSTOM_REAL
enddo
+#endif
+
! outer core
do i=1,NGLOB_OUTER_CORE
displ_outer_core(i) = displ_outer_core(i) &
@@ -22,7 +33,17 @@
+ deltatover2*accel_outer_core(i)
accel_outer_core(i) = 0._CUSTOM_REAL
enddo
+
! inner core
+#ifdef FORCE_VECTORIZATION
+ do i=1,NGLOB_INNER_CORE*NDIM
+ displ_inner_core(i,1) = displ_inner_core(i,1) &
+ + deltat*veloc_inner_core(i,1) + deltatsqover2*accel_inner_core(i,1)
+ veloc_inner_core(i,1) = veloc_inner_core(i,1) &
+ + deltatover2*accel_inner_core(i,1)
+ accel_inner_core(i,1) = 0._CUSTOM_REAL
+ enddo
+#else
do i=1,NGLOB_INNER_CORE
displ_inner_core(:,i) = displ_inner_core(:,i) &
+ deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
@@ -30,6 +51,7 @@
+ deltatover2*accel_inner_core(:,i)
accel_inner_core(:,i) = 0._CUSTOM_REAL
enddo
+#endif
! integral of strain for adjoint movie volume
if(MOVIE_VOLUME .and. (MOVIE_VOLUME_TYPE == 2 .or. MOVIE_VOLUME_TYPE == 3) ) then
@@ -267,7 +289,7 @@
veloc_outer_core(i) = veloc_outer_core(i) + deltatover2*accel_outer_core(i)
enddo
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
! ****************************************************
! big loop over all spectral elements in the solid
@@ -438,7 +460,6 @@
! Stacey
if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) then
-
call compute_stacey_crust_mantle_forward(ichunk, &
it,SAVE_FORWARD,ibool_crust_mantle, &
veloc_crust_mantle,accel_crust_mantle, &
@@ -767,7 +788,6 @@
endif ! end of assembling forces with the central cube
if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) then
-
do i=1,NGLOB_CRUST_MANTLE
accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmassx_crust_mantle(i) &
+ two_omega_earth*veloc_crust_mantle(2,i)
@@ -775,9 +795,7 @@
- two_omega_earth*veloc_crust_mantle(1,i)
accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
enddo
-
else
-
do i=1,NGLOB_CRUST_MANTLE
accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmassz_crust_mantle(i) &
+ two_omega_earth*veloc_crust_mantle(2,i)
@@ -785,10 +803,9 @@
- two_omega_earth*veloc_crust_mantle(1,i)
accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
enddo
-
endif
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
! couples ocean with crust mantle
if(OCEANS_VAL) then
@@ -811,9 +828,16 @@
! Newmark time scheme - corrector for elastic parts
! mantle
+#ifdef FORCE_VECTORIZATION
+ do i=1,NGLOB_CRUST_MANTLE*NDIM
+ veloc_crust_mantle(i,1) = veloc_crust_mantle(i,1) + deltatover2*accel_crust_mantle(i,1)
+ enddo
+#else
do i=1,NGLOB_CRUST_MANTLE
veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
enddo
+#endif
+
! inner core
do i=1,NGLOB_INNER_CORE
accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
@@ -821,9 +845,17 @@
accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
- two_omega_earth*veloc_inner_core(1,i)
accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
+ enddo
+#ifdef FORCE_VECTORIZATION
+ do i=1,NGLOB_INNER_CORE*NDIM
+ veloc_inner_core(i,1) = veloc_inner_core(i,1) + deltatover2*accel_inner_core(i,1)
+ enddo
+#else
+ do i=1,NGLOB_INNER_CORE
veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
enddo
+#endif
! write the seismograms with time shift
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part1_undo_att.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part1_undo_att.F90 2013-07-15 12:38:46 UTC (rev 22612)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part1_undo_att.F90 2013-07-15 13:13:18 UTC (rev 22613)
@@ -8,15 +8,28 @@
do istage = 1, NSTAGE_TIME_SCHEME ! begin loop of istage
+
if(USE_LDDRK)then
+
! mantle
accel_crust_mantle(:,:) = 0._CUSTOM_REAL
! outer core
accel_outer_core(:) = 0._CUSTOM_REAL
! inner core
accel_inner_core(:,:) = 0._CUSTOM_REAL
+
else
+
! mantle
+#ifdef FORCE_VECTORIZATION
+ do i=1,NGLOB_CRUST_MANTLE*NDIM
+ displ_crust_mantle(i,1) = displ_crust_mantle(i,1) &
+ + deltat*veloc_crust_mantle(i,1) + deltatsqover2*accel_crust_mantle(i,1)
+ veloc_crust_mantle(i,1) = veloc_crust_mantle(i,1) &
+ + deltatover2*accel_crust_mantle(i,1)
+ accel_crust_mantle(i,1) = 0._CUSTOM_REAL
+ enddo
+#else
do i=1,NGLOB_CRUST_MANTLE
displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
+ deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
@@ -24,6 +37,8 @@
+ deltatover2*accel_crust_mantle(:,i)
accel_crust_mantle(:,i) = 0._CUSTOM_REAL
enddo
+#endif
+
! outer core
do i=1,NGLOB_OUTER_CORE
displ_outer_core(i) = displ_outer_core(i) &
@@ -32,7 +47,17 @@
+ deltatover2*accel_outer_core(i)
accel_outer_core(i) = 0._CUSTOM_REAL
enddo
+
! inner core
+#ifdef FORCE_VECTORIZATION
+ do i=1,NGLOB_INNER_CORE*NDIM
+ displ_inner_core(i,1) = displ_inner_core(i,1) &
+ + deltat*veloc_inner_core(i,1) + deltatsqover2*accel_inner_core(i,1)
+ veloc_inner_core(i,1) = veloc_inner_core(i,1) &
+ + deltatover2*accel_inner_core(i,1)
+ accel_inner_core(i,1) = 0._CUSTOM_REAL
+ enddo
+#else
do i=1,NGLOB_INNER_CORE
displ_inner_core(:,i) = displ_inner_core(:,i) &
+ deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
@@ -40,6 +65,8 @@
+ deltatover2*accel_inner_core(:,i)
accel_inner_core(:,i) = 0._CUSTOM_REAL
enddo
+#endif
+
endif
if(istage == 1)then
@@ -297,7 +324,8 @@
enddo
endif
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
+
! ****************************************************
! big loop over all spectral elements in the solid
! ****************************************************
@@ -467,7 +495,6 @@
! Stacey
if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) then
-
call compute_stacey_crust_mantle_forward(ichunk, &
it,SAVE_FORWARD,ibool_crust_mantle, &
veloc_crust_mantle,accel_crust_mantle, &
@@ -792,8 +819,7 @@
endif ! end of assembling forces with the central cube
if((NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS .or. &
- (ROTATION_VAL .and. EXACT_MASS_MATRIX_FOR_ROTATION)) .and. (.not. USE_LDDRK)) then
-
+ (ROTATION_VAL .and. EXACT_MASS_MATRIX_FOR_ROTATION)) .and. .not. USE_LDDRK) then
do i=1,NGLOB_CRUST_MANTLE
accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmassx_crust_mantle(i) &
+ two_omega_earth*veloc_crust_mantle(2,i)
@@ -801,9 +827,7 @@
- two_omega_earth*veloc_crust_mantle(1,i)
accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
enddo
-
else
-
do i=1,NGLOB_CRUST_MANTLE
accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmassz_crust_mantle(i) &
+ two_omega_earth*veloc_crust_mantle(2,i)
@@ -811,10 +835,10 @@
- two_omega_earth*veloc_crust_mantle(1,i)
accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
enddo
-
endif
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
+
! couples ocean with crust mantle
if(OCEANS_VAL) then
call compute_coupling_ocean(accel_crust_mantle, &
@@ -836,7 +860,7 @@
! Newmark time scheme - corrector for elastic parts
! mantle
- if(USE_LDDRK)then
+ if(USE_LDDRK) then
do i=1,NGLOB_CRUST_MANTLE
veloc_crust_mantle_lddrk(:,i) = ALPHA_LDDRK(istage) * veloc_crust_mantle_lddrk(:,i) &
+ deltat * accel_crust_mantle(:,i)
@@ -846,39 +870,59 @@
displ_crust_mantle(:,i) = displ_crust_mantle(:,i) + BETA_LDDRK(istage) * displ_crust_mantle_lddrk(:,i)
enddo
else
+#ifdef FORCE_VECTORIZATION
+ do i=1,NGLOB_CRUST_MANTLE*NDIM
+ veloc_crust_mantle(i,1) = veloc_crust_mantle(i,1) + deltatover2*accel_crust_mantle(i,1)
+ enddo
+#else
do i=1,NGLOB_CRUST_MANTLE
veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
enddo
+#endif
endif
+
! inner core
- do i=1,NGLOB_INNER_CORE
- if(ROTATION_VAL .and. EXACT_MASS_MATRIX_FOR_ROTATION .and. .not. USE_LDDRK)then
+ if(ROTATION_VAL .and. EXACT_MASS_MATRIX_FOR_ROTATION .and. .not. USE_LDDRK) then
+ do i=1,NGLOB_INNER_CORE
accel_inner_core(1,i) = accel_inner_core(1,i)*rmassx_inner_core(i) &
+ two_omega_earth*veloc_inner_core(2,i)
accel_inner_core(2,i) = accel_inner_core(2,i)*rmassy_inner_core(i) &
- two_omega_earth*veloc_inner_core(1,i)
accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
- else
+ enddo
+ else
+ do i=1,NGLOB_INNER_CORE
accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
+ two_omega_earth*veloc_inner_core(2,i)
accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
- two_omega_earth*veloc_inner_core(1,i)
accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
- endif
+ enddo
+ endif
- if(USE_LDDRK)then
+ if(USE_LDDRK)then
+ do i=1,NGLOB_INNER_CORE
veloc_inner_core_lddrk(:,i) = ALPHA_LDDRK(istage) * veloc_inner_core_lddrk(:,i) &
+ deltat * accel_inner_core(:,i)
displ_inner_core_lddrk(:,i) = ALPHA_LDDRK(istage) * displ_inner_core_lddrk(:,i) &
+ deltat * veloc_inner_core(:,i)
veloc_inner_core(:,i) = veloc_inner_core(:,i) + BETA_LDDRK(istage) * veloc_inner_core_lddrk(:,i)
displ_inner_core(:,i) = displ_inner_core(:,i) + BETA_LDDRK(istage) * displ_inner_core_lddrk(:,i)
- else
+ enddo
+ else
+#ifdef FORCE_VECTORIZATION
+ do i=1,NGLOB_INNER_CORE*NDIM
+ veloc_inner_core(i,1) = veloc_inner_core(i,1) + deltatover2*accel_inner_core(i,1)
+ enddo
+#else
+ do i=1,NGLOB_INNER_CORE
veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
- endif
- enddo
- enddo ! end loop of istage
+ enddo
+#endif
+ endif
+ enddo ! end of very big external loop on istage for all the stages of the LDDRK time scheme (only one stage if Newmark)
+
! write the seismograms with time shift
! store the seismograms only if there is at least one receiver located in this slice
@@ -1059,4 +1103,3 @@
endif
endif ! MOVIE_VOLUME
-
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part2_classical.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part2_classical.F90 2013-07-15 12:38:46 UTC (rev 22612)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part2_classical.F90 2013-07-15 13:13:18 UTC (rev 22613)
@@ -179,7 +179,7 @@
! assemble all the contributions between slices using MPI
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
if (SIMULATION_TYPE == 3) then
@@ -265,7 +265,7 @@
NGLOB2DMAX_XY_OC_VAL,NCHUNKS_VAL,b_iphase)
enddo
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
! Newmark time scheme - corrector for fluid parts
do i=1,NGLOB_OUTER_CORE
@@ -534,7 +534,7 @@
! crust/mantle and inner core handled in the same call
! in order to reduce the number of MPI messages by 2
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
if (SIMULATION_TYPE == 3) then
@@ -759,10 +759,9 @@
enddo
endif ! end of assembling forces with the central cube
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) then
-
do i=1,NGLOB_CRUST_MANTLE
b_accel_crust_mantle(1,i) = b_accel_crust_mantle(1,i)*rmassx_crust_mantle(i) &
+ b_two_omega_earth*b_veloc_crust_mantle(2,i)
@@ -770,9 +769,7 @@
- b_two_omega_earth*b_veloc_crust_mantle(1,i)
b_accel_crust_mantle(3,i) = b_accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
enddo
-
else
-
do i=1,NGLOB_CRUST_MANTLE
b_accel_crust_mantle(1,i) = b_accel_crust_mantle(1,i)*rmassz_crust_mantle(i) &
+ b_two_omega_earth*b_veloc_crust_mantle(2,i)
@@ -780,7 +777,6 @@
- b_two_omega_earth*b_veloc_crust_mantle(1,i)
b_accel_crust_mantle(3,i) = b_accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
enddo
-
endif
endif ! SIMULATION_TYPE == 3
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part2_undo_att.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part2_undo_att.F90 2013-07-15 12:38:46 UTC (rev 22612)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/part2_undo_att.F90 2013-07-15 13:13:18 UTC (rev 22613)
@@ -210,7 +210,7 @@
! assemble all the contributions between slices using MPI
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
if (SIMULATION_TYPE == 3) then
@@ -296,7 +296,7 @@
NGLOB2DMAX_XY_OC_VAL,NCHUNKS_VAL,b_iphase)
enddo
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
! Newmark time scheme - corrector for fluid parts
do i=1,NGLOB_OUTER_CORE
@@ -599,7 +599,7 @@
! crust/mantle and inner core handled in the same call
! in order to reduce the number of MPI messages by 2
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
if (SIMULATION_TYPE == 3) then
@@ -824,10 +824,9 @@
enddo
endif ! end of assembling forces with the central cube
-! ------------------- new non blocking implementation -------------------
+! ------------------- non blocking implementation -------------------
if((NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) .and. .not. USE_LDDRK) then
-
if(ROTATION_VAL .and. EXACT_MASS_MATRIX_FOR_ROTATION)then
do i=1,NGLOB_CRUST_MANTLE
b_accel_crust_mantle(1,i) = b_accel_crust_mantle(1,i)*b_rmassx_crust_mantle(i) &
@@ -845,7 +844,6 @@
b_accel_crust_mantle(3,i) = b_accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
enddo
endif
-
else
if(ROTATION_VAL .and. EXACT_MASS_MATRIX_FOR_ROTATION .and. .not. USE_LDDRK)then
do i=1,NGLOB_CRUST_MANTLE
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D.F90 2013-07-15 12:38:46 UTC (rev 22612)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D.F90 2013-07-15 13:13:18 UTC (rev 22613)
@@ -2495,19 +2495,18 @@
!! DK DK this first part handles the cases SIMULATION_TYPE == 1 and SIMULATION_TYPE == 2
!! DK DK it also handles the cases NOISE_TOMOGRAPHY == 1 and NOISE_TOMOGRAPHY == 2
!! DK DK
- if(USE_LDDRK .or. EXACT_MASS_MATRIX_FOR_ROTATION)then
- include "part1_undo_att.f90"
+ if(USE_LDDRK .or. EXACT_MASS_MATRIX_FOR_ROTATION) then
+ include "part1_undo_att.F90"
else
- include "part1_classical.f90"
+ include "part1_classical.F90"
endif
!! DK DK
!! DK DK this first part handles the case SIMULATION_TYPE == 3
!! DK DK it also handles the case NOISE_TOMOGRAPHY == 3
!! DK DK
- include "part2_classical.f90"
+ include "part2_classical.F90"
-!! DK DK empty file for now
include "part3_kernel_computation.f90"
!
@@ -2542,7 +2541,7 @@
seismo_current = seismo_current + 1
- include "part1_undo_att.f90"
+ include "part1_undo_att.F90"
enddo
enddo
@@ -2560,7 +2559,7 @@
seismo_current = seismo_current + 1
- include "part1_undo_att.f90"
+ include "part1_undo_att.F90"
enddo
enddo
@@ -2623,8 +2622,9 @@
it = it + 1
seismo_current = seismo_current + 1
- include "part2_undo_att.f90"
+ include "part2_undo_att.F90"
+
b_displ_crust_mantle_store_buffer(:,:,it_of_this_subset) = b_displ_crust_mantle(:,:)
b_displ_outer_core_store_buffer(:,it_of_this_subset) = b_displ_outer_core(:)
b_accel_outer_core_store_buffer(:,it_of_this_subset) = b_accel_outer_core(:)
@@ -2681,7 +2681,7 @@
seismo_current = seismo_current + 1
- include "part1_undo_att.f90"
+ include "part1_undo_att.F90"
include "part3_kernel_computation.f90"
More information about the CIG-COMMITS
mailing list