[cig-commits] r22629 - seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D

Tue Jul 16 14:55:23 PDT 2013

Author: dkomati1
Date: 2013-07-16 14:55:23 -0700 (Tue, 16 Jul 2013)
New Revision: 22629

Modified:
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.f90
Log:
cleaned compute_forces() routines in order to be able to vectorize them later


Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================

--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90	2013-07-16 17:18:13 UTC (rev 22628)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90	2013-07-16 21:55:23 UTC (rev 22629)
@@ -167,7 +167,7 @@
   ! for gravity
   real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
 
-  integer :: ispec,i,j,k,iglob1
+  integer :: ispec,i,j,k,iglob
 
 ! this for non blocking MPI
   integer :: iphase,icall
@@ -278,35 +278,35 @@
 
     iend = min(ispec_glob+ELEMENTS_NONBLOCKING_CM_IC-1,NSPEC_CRUST_MANTLE)
 
-    do ispec=ispec_glob,iend
+    do ispec = ispec_glob,iend
 
 ! hide communications by computing the edges first
     if((icall == 2 .and. is_on_a_slice_edge_crust_mantle(ispec)) .or. &
        (icall == 1 .and. .not. is_on_a_slice_edge_crust_mantle(ispec))) cycle
 
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
 #ifdef FORCE_VECTORIZATION
         do ijk=1,NGLLCUBE
-          iglob1 = ibool(ijk,1,1,ispec)
-          dummyx_loc(ijk,1,1) = displ_crust_mantle(1,iglob1)
-          dummyy_loc(ijk,1,1) = displ_crust_mantle(2,iglob1)
-          dummyz_loc(ijk,1,1) = displ_crust_mantle(3,iglob1)
+          iglob = ibool(ijk,1,1,ispec)
+          dummyx_loc(ijk,1,1) = displ_crust_mantle(1,iglob)
+          dummyy_loc(ijk,1,1) = displ_crust_mantle(2,iglob)
+          dummyz_loc(ijk,1,1) = displ_crust_mantle(3,iglob)
         enddo
 #else
     do k=1,NGLLZ
       do j=1,NGLLY
         do i=1,NGLLX
-          iglob1 = ibool(i,j,k,ispec)
-          dummyx_loc(i,j,k) = displ_crust_mantle(1,iglob1)
-          dummyy_loc(i,j,k) = displ_crust_mantle(2,iglob1)
-          dummyz_loc(i,j,k) = displ_crust_mantle(3,iglob1)
+          iglob = ibool(i,j,k,ispec)
+          dummyx_loc(i,j,k) = displ_crust_mantle(1,iglob)
+          dummyy_loc(i,j,k) = displ_crust_mantle(2,iglob)
+          dummyz_loc(i,j,k) = displ_crust_mantle(3,iglob)
         enddo
       enddo
     enddo
 #endif
 
+    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+    ! for incompressible fluid flow, Cambridge University Press (2002),
+    ! pages 386 and 389 and Figure 8.3.1
     do j=1,m2
        do i=1,m1
           C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
@@ -498,30 +498,34 @@
       enddo
     enddo
 
+    ! sum contributions
     do k=1,NGLLZ
       do j=1,NGLLY
         fac1 = wgllwgll_yz(j,k)
         do i=1,NGLLX
           fac2 = wgllwgll_xz(i,k)
           fac3 = wgllwgll_xy(i,j)
-
-          ! sum contributions
           sum_terms(1,i,j,k) = - (fac1*newtempx1(i,j,k) + fac2*newtempx2(i,j,k) + fac3*newtempx3(i,j,k))
           sum_terms(2,i,j,k) = - (fac1*newtempy1(i,j,k) + fac2*newtempy2(i,j,k) + fac3*newtempy3(i,j,k))
           sum_terms(3,i,j,k) = - (fac1*newtempz1(i,j,k) + fac2*newtempz2(i,j,k) + fac3*newtempz3(i,j,k))
+        enddo
+      enddo
+    enddo
 
-          if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
+    ! add gravity terms
+    if(GRAVITY_VAL) then
+      sum_terms(:,:,:,:) = sum_terms(:,:,:,:) + rho_s_H(:,:,:,:)
+    endif
 
-        enddo ! NGLLX
-      enddo ! NGLLY
-    enddo ! NGLLZ
-
-    ! sum contributions from each element to the global mesh and add gravity terms
+    ! sum contributions from each element to the global mesh
     do k=1,NGLLZ
       do j=1,NGLLY
         do i=1,NGLLX
-          iglob1 = ibool(i,j,k,ispec)
-          accel_crust_mantle(:,iglob1) = accel_crust_mantle(:,iglob1) + sum_terms(:,i,j,k)
+          iglob = ibool(i,j,k,ispec)
+! do NOT use array syntax ":" for the three statements below otherwise most compilers will not be able to vectorize the outer loop
+          accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,i,j,k)
+          accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,i,j,k)
+          accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,i,j,k)
         enddo
       enddo
     enddo
@@ -564,10 +568,9 @@
         epsilondev(5,:,:,:,ispec) = epsilondev_loc(5,:,:,:)
     endif
 
-! end ispec loop
-   enddo
+   enddo ! end of ispec loop
 
-  enddo   ! spectral element loop NSPEC_CRUST_MANTLE
+  enddo ! of spectral element loop NSPEC_CRUST_MANTLE
 
   end subroutine compute_forces_crust_mantle_Dev
 

Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90	2013-07-16 17:18:13 UTC (rev 22628)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90	2013-07-16 21:55:23 UTC (rev 22629)
@@ -178,7 +178,7 @@
   integer :: int_radius
   integer :: ispec
   integer :: i,j,k
-  integer :: iglob1
+  integer :: iglob
 
 ! this for non blocking MPI
   integer :: iphase,icall
@@ -301,29 +301,29 @@
 
       endif
 
-      ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-      ! for incompressible fluid flow, Cambridge University Press (2002),
-      ! pages 386 and 389 and Figure 8.3.1
 #ifdef FORCE_VECTORIZATION
         do ijk=1,NGLLCUBE
-            iglob1 = ibool(ijk,1,1,ispec)
-            dummyx_loc(ijk,1,1) = displ_inner_core(1,iglob1)
-            dummyy_loc(ijk,1,1) = displ_inner_core(2,iglob1)
-            dummyz_loc(ijk,1,1) = displ_inner_core(3,iglob1)
+            iglob = ibool(ijk,1,1,ispec)
+            dummyx_loc(ijk,1,1) = displ_inner_core(1,iglob)
+            dummyy_loc(ijk,1,1) = displ_inner_core(2,iglob)
+            dummyz_loc(ijk,1,1) = displ_inner_core(3,iglob)
           enddo
 #else
       do k=1,NGLLZ
         do j=1,NGLLY
           do i=1,NGLLX
-            iglob1 = ibool(i,j,k,ispec)
-            dummyx_loc(i,j,k) = displ_inner_core(1,iglob1)
-            dummyy_loc(i,j,k) = displ_inner_core(2,iglob1)
-            dummyz_loc(i,j,k) = displ_inner_core(3,iglob1)
+            iglob = ibool(i,j,k,ispec)
+            dummyx_loc(i,j,k) = displ_inner_core(1,iglob)
+            dummyy_loc(i,j,k) = displ_inner_core(2,iglob)
+            dummyz_loc(i,j,k) = displ_inner_core(3,iglob)
           enddo
         enddo
       enddo
 #endif
 
+      ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+      ! for incompressible fluid flow, Cambridge University Press (2002),
+      ! pages 386 and 389 and Figure 8.3.1
       do j=1,m2
          do i=1,m1
             C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
@@ -541,10 +541,10 @@
 
               ! use mesh coordinates to get theta and phi
               ! x y and z contain r theta and phi
-              iglob1 = ibool(i,j,k,ispec)
-              radius = dble(xstore(iglob1))
-              theta = dble(ystore(iglob1))
-              phi = dble(zstore(iglob1))
+              iglob = ibool(i,j,k,ispec)
+              radius = dble(xstore(iglob))
+              theta = dble(ystore(iglob))
+              phi = dble(zstore(iglob))
 
               ! make sure radius is never zero even for points at center of cube
               ! because we later divide by radius
@@ -587,64 +587,33 @@
               Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
 
               ! for locality principle, we set iglob again, in order to have it in the cache again
-              iglob1 = ibool(i,j,k,ispec)
+              iglob = ibool(i,j,k,ispec)
 
-              ! distinguish between single and double precision for reals
-              if(CUSTOM_REAL == SIZE_REAL) then
-                ! get displacement and multiply by density to compute G tensor
-                sx_l = rho * dble(displ_inner_core(1,iglob1))
-                sy_l = rho * dble(displ_inner_core(2,iglob1))
-                sz_l = rho * dble(displ_inner_core(3,iglob1))
+              ! get displacement and multiply by density to compute G tensor
+              sx_l = rho * displ_inner_core(1,iglob)
+              sy_l = rho * displ_inner_core(2,iglob)
+              sz_l = rho * displ_inner_core(3,iglob)
 
-                ! compute G tensor from s . g and add to sigma (not symmetric)
-                sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
-                sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
-                sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
+              ! compute G tensor from s . g and add to sigma (not symmetric)
+              sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
+              sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
+              sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
 
-                sigma_xy = sigma_xy - sngl(sx_l * gyl)
-                sigma_yx = sigma_yx - sngl(sy_l * gxl)
+              sigma_xy = sigma_xy - sx_l * gyl
+              sigma_yx = sigma_yx - sy_l * gxl
 
-                sigma_xz = sigma_xz - sngl(sx_l * gzl)
-                sigma_zx = sigma_zx - sngl(sz_l * gxl)
+              sigma_xz = sigma_xz - sx_l * gzl
+              sigma_zx = sigma_zx - sz_l * gxl
 
-                sigma_yz = sigma_yz - sngl(sy_l * gzl)
-                sigma_zy = sigma_zy - sngl(sz_l * gyl)
+              sigma_yz = sigma_yz - sy_l * gzl
+              sigma_zy = sigma_zy - sz_l * gyl
 
-                ! precompute vector
-                factor = dble(jacobianl) * wgll_cube(i,j,k)
-                rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
-                rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
-                rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+              ! precompute vector
+              factor = jacobianl * wgll_cube(i,j,k)
+              rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+              rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+              rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
 
-              else
-
-                ! get displacement and multiply by density to compute G tensor
-                sx_l = rho * displ_inner_core(1,iglob1)
-                sy_l = rho * displ_inner_core(2,iglob1)
-                sz_l = rho * displ_inner_core(3,iglob1)
-
-                ! compute G tensor from s . g and add to sigma (not symmetric)
-                sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
-                sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
-                sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
-
-                sigma_xy = sigma_xy - sx_l * gyl
-                sigma_yx = sigma_yx - sy_l * gxl
-
-                sigma_xz = sigma_xz - sx_l * gzl
-                sigma_zx = sigma_zx - sz_l * gxl
-
-                sigma_yz = sigma_yz - sy_l * gzl
-                sigma_zy = sigma_zy - sz_l * gyl
-
-                ! precompute vector
-                factor = jacobianl * wgll_cube(i,j,k)
-                rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
-                rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
-                rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
-
-              endif
-
             endif  ! end of section with gravity terms
 
             ! form dot product with test vector, non-symmetric form
@@ -736,30 +705,34 @@
         enddo
       enddo
 
+      ! sum contributions
       do k=1,NGLLZ
         do j=1,NGLLY
           fac1 = wgllwgll_yz(j,k)
           do i=1,NGLLX
             fac2 = wgllwgll_xz(i,k)
             fac3 = wgllwgll_xy(i,j)
-
-            ! sum contributions
             sum_terms(1,i,j,k) = - (fac1*newtempx1(i,j,k) + fac2*newtempx2(i,j,k) + fac3*newtempx3(i,j,k))
             sum_terms(2,i,j,k) = - (fac1*newtempy1(i,j,k) + fac2*newtempy2(i,j,k) + fac3*newtempy3(i,j,k))
             sum_terms(3,i,j,k) = - (fac1*newtempz1(i,j,k) + fac2*newtempz2(i,j,k) + fac3*newtempz3(i,j,k))
-
-            if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
-
           enddo
         enddo
       enddo
 
-      ! sum contributions from each element to the global mesh and add gravity terms
+      ! add gravity terms
+      if(GRAVITY_VAL) then
+        sum_terms(:,:,:,:) = sum_terms(:,:,:,:) + rho_s_H(:,:,:,:)
+      endif
+
+      ! sum contributions from each element to the global mesh
       do k=1,NGLLZ
         do j=1,NGLLY
           do i=1,NGLLX
-            iglob1 = ibool(i,j,k,ispec)
-            accel_inner_core(:,iglob1) = accel_inner_core(:,iglob1) + sum_terms(:,i,j,k)
+            iglob = ibool(i,j,k,ispec)
+! do NOT use array syntax ":" for the three statements below otherwise most compilers will not be able to vectorize the outer loop
+            accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(1,i,j,k)
+            accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(2,i,j,k)
+            accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(3,i,j,k)
           enddo
         enddo
       enddo
@@ -802,9 +775,9 @@
 
       endif
 
-    endif   ! end test to exclude fictitious elements in central cube
+    endif ! end of test to exclude fictitious elements in central cube
 
-  enddo ! spectral element loop
+  enddo ! of spectral element loop
 
   end subroutine compute_forces_inner_core_Dev
 

Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.f90	2013-07-16 17:18:13 UTC (rev 22628)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.f90	2013-07-16 21:55:23 UTC (rev 22629)
@@ -106,7 +106,7 @@
   integer :: i,j,k
   real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
   real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
-  real(kind=CUSTOM_REAL) :: sum_terms
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: sum_terms
 
   ! manually inline the calls to the Deville et al. (2002) routines
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc
@@ -213,7 +213,7 @@
         do i=1,NGLLX
           iglob = ibool(i,j,k,ispec)
 
-          ! stores "displacement"
+          ! get a local copy of the potential field
           dummyx_loc(i,j,k) = displfluid(iglob)
 
           ! pre-computes factors
@@ -233,11 +233,11 @@
           if( .not. GRAVITY_VAL ) then
             ! grad(rho)/rho in Cartesian components
             displ_times_grad_x_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
-                  * sngl(sin_theta * cos_phi * d_ln_density_dr_table(int_radius))
+                  * sin_theta * cos_phi * d_ln_density_dr_table(int_radius)
             displ_times_grad_y_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
-                  * sngl(sin_theta * sin_phi * d_ln_density_dr_table(int_radius))
+                  * sin_theta * sin_phi * d_ln_density_dr_table(int_radius)
             displ_times_grad_z_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
-                  * sngl(cos_theta * d_ln_density_dr_table(int_radius))
+                  * cos_theta * d_ln_density_dr_table(int_radius)
           else
             ! Cartesian components of the gravitational acceleration
             ! integrate and multiply by rho / Kappa
@@ -388,35 +388,25 @@
             gyl = temp_gyl(i,j,k)
             gzl = temp_gzl(i,j,k)
 
-            ! distinguish between single and double precision for reals
-            if(CUSTOM_REAL == SIZE_REAL) then
-              gravity_term(i,j,k) = &
-                      sngl( minus_rho_g_over_kappa_fluid(int_radius) &
-                      * dble(jacobianl) * wgll_cube(i,j,k) &
-                      * (dble(dpotentialdx_with_rot) * gxl  &
-                         + dble(dpotentialdy_with_rot) * gyl &
-                         + dble(dpotentialdzl) * gzl) )
-            else
-              gravity_term(i,j,k) = minus_rho_g_over_kappa_fluid(int_radius) * &
+            gravity_term(i,j,k) = minus_rho_g_over_kappa_fluid(int_radius) * &
                         jacobianl * wgll_cube(i,j,k) &
                         * (dpotentialdx_with_rot * gxl  &
                           + dpotentialdy_with_rot * gyl &
                           + dpotentialdzl * gzl)
-            endif
 
             if(istage == 1)then
             ! divergence of displacement field with gravity on
             ! note: these calculations are only considered for SIMULATION_TYPE == 1 .and. SAVE_FORWARD
             !          and one has set MOVIE_VOLUME_TYPE == 4 when MOVIE_VOLUME is .true.;
             !         in case of SIMULATION_TYPE == 3, it gets overwritten by compute_kernels_outer_core()
-            if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. SIMULATION_TYPE == 1 .and. MOVIE_VOLUME) then
-              div_displfluid(i,j,k,ispec) =  &
+              if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. SIMULATION_TYPE == 1 .and. MOVIE_VOLUME) then
+                div_displfluid(i,j,k,ispec) =  &
                         minus_rho_g_over_kappa_fluid(int_radius) &
                         * (dpotentialdx_with_rot * gxl &
                          + dpotentialdy_with_rot * gyl &
                          + dpotentialdzl * gzl)
+              endif
             endif
-            endif
 
           endif
 
@@ -466,27 +456,34 @@
       enddo
     enddo
 
+    ! sum contributions from each element to the global mesh
     do k=1,NGLLZ
       do j=1,NGLLY
         do i=1,NGLLX
+          sum_terms(i,j,k) = - (wgllwgll_yz(j,k)*newtempx1(i,j,k) &
+                              + wgllwgll_xz(i,k)*newtempx2(i,j,k) &
+                              + wgllwgll_xy(i,j)*newtempx3(i,j,k))
+        enddo
+      enddo
+    enddo
 
-          ! sum contributions from each element to the global mesh and add gravity term
-          sum_terms = - (wgllwgll_yz(j,k)*newtempx1(i,j,k) &
-                       + wgllwgll_xz(i,k)*newtempx2(i,j,k) &
-                       + wgllwgll_xy(i,j)*newtempx3(i,j,k))
+    ! add gravity term
+    if(GRAVITY_VAL) then
+      sum_terms(:,:,:) = sum_terms(:,:,:) + gravity_term(:,:,:)
+    endif
 
-          if(GRAVITY_VAL) sum_terms = sum_terms + gravity_term(i,j,k)
-
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
           iglob = ibool(i,j,k,ispec)
-          accelfluid(iglob) = accelfluid(iglob) + sum_terms
-
+          accelfluid(iglob) = accelfluid(iglob) + sum_terms(i,j,k)
         enddo
       enddo
     enddo
 
     ! update rotation term with Euler scheme
     if(ROTATION_VAL) then
-      if(USE_LDDRK)then
+      if(USE_LDDRK) then
         ! use the source saved above
         A_array_rotation_lddrk(:,:,:,ispec) = ALPHA_LDDRK(istage) * A_array_rotation_lddrk(:,:,:,ispec) + source_euler_A(:,:,:)
         A_array_rotation(:,:,:,ispec) = A_array_rotation(:,:,:,ispec) + BETA_LDDRK(istage) * A_array_rotation_lddrk(:,:,:,ispec)
@@ -500,7 +497,7 @@
       endif
     endif
 
-  enddo   ! spectral element loop
+  enddo ! of spectral element loop
 
   end subroutine compute_forces_outer_core_Dev