[cig-commits] r22636 - seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D

Wed Jul 17 08:37:53 PDT 2013

Author: dkomati1
Date: 2013-07-17 08:37:53 -0700 (Wed, 17 Jul 2013)
New Revision: 22636

Modified:
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90
Log:
vectorized more loops in compute_forces()


Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================

--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90	2013-07-17 12:59:27 UTC (rev 22635)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90	2013-07-17 15:37:53 UTC (rev 22636)
@@ -274,7 +274,7 @@
 
     endif
 
-    iend = min(ispec_glob+ELEMENTS_NONBLOCKING_CM_IC-1,NSPEC_CRUST_MANTLE)
+    iend = min(ispec_glob + ELEMENTS_NONBLOCKING_CM_IC - 1, NSPEC_CRUST_MANTLE)
 
     do ispec = ispec_glob,iend
 
@@ -379,7 +379,6 @@
     !
     if(ANISOTROPIC_3D_MANTLE_VAL) then
        ! anisotropic element
-
        call compute_element_aniso(ispec, &
             minus_gravity_table,density_table,minus_deriv_gravity_table, &
             xstore,ystore,zstore, &
@@ -394,7 +393,7 @@
             tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
             dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,eps_trace_over_3(1,1,1,ispec),rho_s_H)
     else
-       if( .not. ispec_is_tiso(ispec) ) then
+       if(.not. ispec_is_tiso(ispec)) then
           ! isotropic element
           call compute_element_iso(ispec, &
                minus_gravity_table,density_table,minus_deriv_gravity_table, &
@@ -409,7 +408,6 @@
                dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,eps_trace_over_3(1,1,1,ispec),rho_s_H)
        else
           ! transverse isotropic element
-
           call compute_element_tiso(ispec, &
                minus_gravity_table,density_table,minus_deriv_gravity_table, &
                xstore,ystore,zstore, &
@@ -421,7 +419,7 @@
                one_minus_sum_beta,vnspec, &
                tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
                dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,eps_trace_over_3(1,1,1,ispec),rho_s_H)
-       endif ! .not. ispec_is_tiso
+       endif
     endif
 
     ! subroutines adapted from Deville, Fischer and Mund, High-order methods

Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90	2013-07-17 12:59:27 UTC (rev 22635)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90	2013-07-17 15:37:53 UTC (rev 22636)
@@ -392,6 +392,260 @@
          enddo
       enddo
 
+#ifdef FORCE_VECTORIZATION
+          do ijk=1,NGLLCUBE
+
+            ! get derivatives of ux, uy and uz with respect to x, y and z
+            xixl = xix(ijk,1,1,ispec)
+            xiyl = xiy(ijk,1,1,ispec)
+            xizl = xiz(ijk,1,1,ispec)
+            etaxl = etax(ijk,1,1,ispec)
+            etayl = etay(ijk,1,1,ispec)
+            etazl = etaz(ijk,1,1,ispec)
+            gammaxl = gammax(ijk,1,1,ispec)
+            gammayl = gammay(ijk,1,1,ispec)
+            gammazl = gammaz(ijk,1,1,ispec)
+
+            ! compute the jacobian
+            jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+                          - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+                          + xizl*(etaxl*gammayl-etayl*gammaxl))
+
+            duxdxl = xixl*tempx1(ijk,1,1) + etaxl*tempx2(ijk,1,1) + gammaxl*tempx3(ijk,1,1)
+            duxdyl = xiyl*tempx1(ijk,1,1) + etayl*tempx2(ijk,1,1) + gammayl*tempx3(ijk,1,1)
+            duxdzl = xizl*tempx1(ijk,1,1) + etazl*tempx2(ijk,1,1) + gammazl*tempx3(ijk,1,1)
+
+            duydxl = xixl*tempy1(ijk,1,1) + etaxl*tempy2(ijk,1,1) + gammaxl*tempy3(ijk,1,1)
+            duydyl = xiyl*tempy1(ijk,1,1) + etayl*tempy2(ijk,1,1) + gammayl*tempy3(ijk,1,1)
+            duydzl = xizl*tempy1(ijk,1,1) + etazl*tempy2(ijk,1,1) + gammazl*tempy3(ijk,1,1)
+
+            duzdxl = xixl*tempz1(ijk,1,1) + etaxl*tempz2(ijk,1,1) + gammaxl*tempz3(ijk,1,1)
+            duzdyl = xiyl*tempz1(ijk,1,1) + etayl*tempz2(ijk,1,1) + gammayl*tempz3(ijk,1,1)
+            duzdzl = xizl*tempz1(ijk,1,1) + etazl*tempz2(ijk,1,1) + gammazl*tempz3(ijk,1,1)
+
+            ! precompute some sums to save CPU time
+            duxdxl_plus_duydyl = duxdxl + duydyl
+            duxdxl_plus_duzdzl = duxdxl + duzdzl
+            duydyl_plus_duzdzl = duydyl + duzdzl
+            duxdyl_plus_duydxl = duxdyl + duydxl
+            duzdxl_plus_duxdzl = duzdxl + duxdzl
+            duzdyl_plus_duydzl = duzdyl + duydzl
+
+            if (COMPUTE_AND_STORE_STRAIN_VAL) then
+              templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+              epsilondev_loc(1,ijk,1,1) = duxdxl - templ
+              epsilondev_loc(2,ijk,1,1) = duydyl - templ
+              epsilondev_loc(3,ijk,1,1) = 0.5 * duxdyl_plus_duydxl
+              epsilondev_loc(4,ijk,1,1) = 0.5 * duzdxl_plus_duxdzl
+              epsilondev_loc(5,ijk,1,1) = 0.5 * duzdyl_plus_duydzl
+              eps_trace_over_3(ijk,1,1,ispec) = templ
+            endif
+
+            if(ATTENUATION_3D_VAL) then
+              minus_sum_beta =  one_minus_sum_beta(ijk,1,1,ispec) - 1.0
+            else if(ATTENUATION_VAL) then
+              minus_sum_beta =  one_minus_sum_beta(1,1,1,ispec) - 1.0
+            endif
+
+            if(ANISOTROPIC_INNER_CORE_VAL) then
+              ! elastic tensor for hexagonal symmetry in reduced notation:
+              !
+              !      c11 c12 c13  0   0        0
+              !      c12 c11 c13  0   0        0
+              !      c13 c13 c33  0   0        0
+              !       0   0   0  c44  0        0
+              !       0   0   0   0  c44       0
+              !       0   0   0   0   0  (c11-c12)/2
+              !
+              !       in terms of the A, C, L, N and F of Love (1927):
+              !
+              !       c11 = A
+              !       c12 = A-2N
+              !       c13 = F
+              !       c33 = C
+              !       c44 = L
+              c11l = c11store(ijk,1,1,ispec)
+              c12l = c12store(ijk,1,1,ispec)
+              c13l = c13store(ijk,1,1,ispec)
+              c33l = c33store(ijk,1,1,ispec)
+              c44l = c44store(ijk,1,1,ispec)
+
+              ! use unrelaxed parameters if attenuation
+              if(ATTENUATION_VAL) then
+                mul = muvstore(ijk,1,1,ispec)
+                c11l = c11l + FOUR_THIRDS * minus_sum_beta * mul
+                c12l = c12l - TWO_THIRDS * minus_sum_beta * mul
+                c13l = c13l - TWO_THIRDS * minus_sum_beta * mul
+                c33l = c33l + FOUR_THIRDS * minus_sum_beta * mul
+                c44l = c44l + minus_sum_beta * mul
+              endif
+
+              sigma_xx = c11l*duxdxl + c12l*duydyl + c13l*duzdzl
+              sigma_yy = c12l*duxdxl + c11l*duydyl + c13l*duzdzl
+              sigma_zz = c13l*duxdxl + c13l*duydyl + c33l*duzdzl
+              sigma_xy = 0.5*(c11l-c12l)*duxdyl_plus_duydxl
+              sigma_xz = c44l*duzdxl_plus_duxdzl
+              sigma_yz = c44l*duzdyl_plus_duydzl
+            else
+
+              ! inner core with no anisotropy, use kappav and muv for instance
+              ! layer with no anisotropy, use kappav and muv for instance
+              kappal = kappavstore(ijk,1,1,ispec)
+              mul = muvstore(ijk,1,1,ispec)
+
+              ! use unrelaxed parameters if attenuation
+              if(ATTENUATION_3D_VAL) then
+                mul = mul * one_minus_sum_beta(ijk,1,1,ispec)
+              else if(ATTENUATION_VAL) then
+                mul = mul * one_minus_sum_beta(1,1,1,ispec)
+              endif
+
+              lambdalplus2mul = kappal + FOUR_THIRDS * mul
+              lambdal = lambdalplus2mul - 2.*mul
+
+              ! compute stress sigma
+              sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
+              sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
+              sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
+
+              sigma_xy = mul*duxdyl_plus_duydxl
+              sigma_xz = mul*duzdxl_plus_duxdzl
+              sigma_yz = mul*duzdyl_plus_duydzl
+
+            endif
+
+            ! subtract memory variables if attenuation
+            if(ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL) then
+! do NOT put this is a subroutine, otherwise the call to the subroutine prevents compilers from vectorizing the outer loop
+! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop in order to vectorize the outer loop
+              R_xx_val = R_memory(1,1,ijk,1,1,ispec)
+              R_yy_val = R_memory(2,1,ijk,1,1,ispec)
+              sigma_xx = sigma_xx - R_xx_val
+              sigma_yy = sigma_yy - R_yy_val
+              sigma_zz = sigma_zz + R_xx_val + R_yy_val
+              sigma_xy = sigma_xy - R_memory(3,1,ijk,1,1,ispec)
+              sigma_xz = sigma_xz - R_memory(4,1,ijk,1,1,ispec)
+              sigma_yz = sigma_yz - R_memory(5,1,ijk,1,1,ispec)
+
+              R_xx_val = R_memory(1,2,ijk,1,1,ispec)
+              R_yy_val = R_memory(2,2,ijk,1,1,ispec)
+              sigma_xx = sigma_xx - R_xx_val
+              sigma_yy = sigma_yy - R_yy_val
+              sigma_zz = sigma_zz + R_xx_val + R_yy_val
+              sigma_xy = sigma_xy - R_memory(3,2,ijk,1,1,ispec)
+              sigma_xz = sigma_xz - R_memory(4,2,ijk,1,1,ispec)
+              sigma_yz = sigma_yz - R_memory(5,2,ijk,1,1,ispec)
+              
+              R_xx_val = R_memory(1,3,ijk,1,1,ispec)
+              R_yy_val = R_memory(2,3,ijk,1,1,ispec)
+              sigma_xx = sigma_xx - R_xx_val
+              sigma_yy = sigma_yy - R_yy_val
+              sigma_zz = sigma_zz + R_xx_val + R_yy_val
+              sigma_xy = sigma_xy - R_memory(3,3,ijk,1,1,ispec)
+              sigma_xz = sigma_xz - R_memory(4,3,ijk,1,1,ispec)
+              sigma_yz = sigma_yz - R_memory(5,3,ijk,1,1,ispec)
+            endif
+
+            ! define symmetric components of sigma for gravity
+            sigma_yx = sigma_xy
+            sigma_zx = sigma_xz
+            sigma_zy = sigma_yz
+
+            ! compute non-symmetric terms for gravity
+            if(GRAVITY_VAL) then
+
+              ! use mesh coordinates to get theta and phi
+              ! x y and z contain r theta and phi
+              iglob = ibool(ijk,1,1,ispec)
+              radius = dble(xstore(iglob))
+              theta = dble(ystore(iglob))
+              phi = dble(zstore(iglob))
+
+              ! make sure radius is never zero even for points at center of cube
+              ! because we later divide by radius
+              if(radius < 100.d0 / R_EARTH) radius = 100.d0 / R_EARTH
+
+              cos_theta = dcos(theta)
+              sin_theta = dsin(theta)
+              cos_phi = dcos(phi)
+              sin_phi = dsin(phi)
+
+              cos_theta_sq = cos_theta**2
+              sin_theta_sq = sin_theta**2
+              cos_phi_sq = cos_phi**2
+              sin_phi_sq = sin_phi**2
+
+              ! get g, rho and dg/dr=dg
+              ! spherical components of the gravitational acceleration
+              ! for efficiency replace with lookup table every 100 m in radial direction
+              ! make sure we never use zero for point exactly at the center of the Earth
+              int_radius = max(1,nint(radius * R_EARTH_KM * 10.d0))
+              minus_g = minus_gravity_table(int_radius)
+              minus_dg = minus_deriv_gravity_table(int_radius)
+              rho = density_table(int_radius)
+
+              ! Cartesian components of the gravitational acceleration
+              gxl = minus_g*sin_theta*cos_phi
+              gyl = minus_g*sin_theta*sin_phi
+              gzl = minus_g*cos_theta
+
+              ! Cartesian components of gradient of gravitational acceleration
+              ! obtained from spherical components
+              minus_g_over_radius = minus_g / radius
+              minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius
+
+              Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq
+              Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq
+              Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq
+              Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq
+              Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
+              Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
+
+              ! for locality principle, we set iglob again, in order to have it in the cache again
+              iglob = ibool(ijk,1,1,ispec)
+
+              ! get displacement and multiply by density to compute G tensor
+              sx_l = rho * displ_inner_core(1,iglob)
+              sy_l = rho * displ_inner_core(2,iglob)
+              sz_l = rho * displ_inner_core(3,iglob)
+
+              ! compute G tensor from s . g and add to sigma (not symmetric)
+              sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
+              sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
+              sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
+
+              sigma_xy = sigma_xy - sx_l * gyl
+              sigma_yx = sigma_yx - sy_l * gxl
+
+              sigma_xz = sigma_xz - sx_l * gzl
+              sigma_zx = sigma_zx - sz_l * gxl
+
+              sigma_yz = sigma_yz - sy_l * gzl
+              sigma_zy = sigma_zy - sz_l * gyl
+
+              ! precompute vector
+              factor = jacobianl * wgll_cube(ijk,1,1)
+              rho_s_H(1,ijk,1,1) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+              rho_s_H(2,ijk,1,1) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+              rho_s_H(3,ijk,1,1) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+
+            endif  ! end of section with gravity terms
+
+            ! form dot product with test vector, non-symmetric form
+            tempx1(ijk,1,1) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
+            tempy1(ijk,1,1) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
+            tempz1(ijk,1,1) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
+
+            tempx2(ijk,1,1) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
+            tempy2(ijk,1,1) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
+            tempz2(ijk,1,1) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
+
+            tempx3(ijk,1,1) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
+            tempy3(ijk,1,1) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
+            tempz3(ijk,1,1) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
+
+          enddo
+#else
       do k=1,NGLLZ
         do j=1,NGLLY
           do i=1,NGLLX
@@ -631,6 +885,7 @@
           enddo
         enddo
       enddo
+#endif
 
       ! subroutines adapted from Deville, Fischer and Mund, High-order methods
       ! for incompressible fluid flow, Cambridge University Press (2002),

Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90	2013-07-17 12:59:27 UTC (rev 22635)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90	2013-07-17 15:37:53 UTC (rev 22636)
@@ -327,7 +327,138 @@
       enddo
     enddo
 
+#ifdef FORCE_VECTORIZATION
+        do ijk=1,NGLLCUBE
 
+          ! get derivatives of velocity potential with respect to x, y and z
+          xixl = xix(ijk,1,1,ispec)
+          xiyl = xiy(ijk,1,1,ispec)
+          xizl = xiz(ijk,1,1,ispec)
+          etaxl = etax(ijk,1,1,ispec)
+          etayl = etay(ijk,1,1,ispec)
+          etazl = etaz(ijk,1,1,ispec)
+          gammaxl = gammax(ijk,1,1,ispec)
+          gammayl = gammay(ijk,1,1,ispec)
+          gammazl = gammaz(ijk,1,1,ispec)
+
+          ! compute the jacobian
+          jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+                        - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+                        + xizl*(etaxl*gammayl-etayl*gammaxl))
+
+          dpotentialdxl = xixl*tempx1(ijk,1,1) + etaxl*tempx2(ijk,1,1) + gammaxl*tempx3(ijk,1,1)
+          dpotentialdyl = xiyl*tempx1(ijk,1,1) + etayl*tempx2(ijk,1,1) + gammayl*tempx3(ijk,1,1)
+          dpotentialdzl = xizl*tempx1(ijk,1,1) + etazl*tempx2(ijk,1,1) + gammazl*tempx3(ijk,1,1)
+
+          ! compute contribution of rotation and add to gradient of potential
+          ! this term has no Z component
+          if(ROTATION_VAL) then
+
+            ! store the source for the Euler scheme for A_rotation and B_rotation
+            two_omega_deltat = deltat * two_omega_earth
+
+            cos_two_omega_t = cos(two_omega_earth*time)
+            sin_two_omega_t = sin(two_omega_earth*time)
+
+            ! time step deltat of Euler scheme is included in the source
+            source_euler_A(ijk,1,1) = two_omega_deltat &
+                  * (cos_two_omega_t * dpotentialdyl + sin_two_omega_t * dpotentialdxl)
+            source_euler_B(ijk,1,1) = two_omega_deltat &
+                  * (sin_two_omega_t * dpotentialdyl - cos_two_omega_t * dpotentialdxl)
+
+            A_rotation = A_array_rotation(ijk,1,1,ispec)
+            B_rotation = B_array_rotation(ijk,1,1,ispec)
+
+            ux_rotation =   A_rotation*cos_two_omega_t + B_rotation*sin_two_omega_t
+            uy_rotation = - A_rotation*sin_two_omega_t + B_rotation*cos_two_omega_t
+
+            dpotentialdx_with_rot = dpotentialdxl + ux_rotation
+            dpotentialdy_with_rot = dpotentialdyl + uy_rotation
+
+          else
+
+            dpotentialdx_with_rot = dpotentialdxl
+            dpotentialdy_with_rot = dpotentialdyl
+
+          endif  ! end of section with rotation
+
+          ! add (chi/rho)grad(rho) term in no gravity case
+          if(.not. GRAVITY_VAL) then
+
+            ! With regards to the non-gravitating case: we cannot set N^2 = 0 *and* let g = 0.
+            ! We can *either* assume N^2 = 0 but keep gravity g, *or* we can assume that gravity
+            ! is negligible to begin with, as in our GJI 2002a, in which case N does not arise.
+            ! We get:
+            !
+            ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
+            !
+            ! Then the displacement is
+            !
+            ! \bu = \bdel\chi+\chi\bdel\ln\rho = \rho^{-1}\bdel(\rho\chi)
+            !
+            ! and the pressure is
+            !
+            ! p = -\rho\ddot{\chi}
+            !
+            ! Thus in our 2002b GJI paper eqn (21) is wrong, and equation (41)
+            ! in our AGU monograph is incorrect; these equations should be replaced by
+            !
+            ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
+            !
+            ! Note that the fluid potential we use in GJI 2002a differs from the one used here:
+            !
+            ! \chi_GJI2002a = \rho\partial\t\chi
+            !
+            ! such that
+            !
+            ! \bv = \partial_t\bu=\rho^{-1}\bdel\chi_GJI2002a  (GJI 2002a eqn 20)
+            !
+            ! p = - \partial_t\chi_GJI2002a (GJI 2002a eqn 19)
+
+            ! use mesh coordinates to get theta and phi
+            ! x y z contain r theta phi
+            dpotentialdx_with_rot = dpotentialdx_with_rot + displ_times_grad_x_ln_rho(ijk,1,1)
+            dpotentialdy_with_rot = dpotentialdy_with_rot + displ_times_grad_y_ln_rho(ijk,1,1)
+            dpotentialdzl = dpotentialdzl + displ_times_grad_z_ln_rho(ijk,1,1)
+
+         else  ! if gravity is turned on
+
+            ! compute divergence of displacment
+            gxl = temp_gxl(ijk,1,1)
+            gyl = temp_gyl(ijk,1,1)
+            gzl = temp_gzl(ijk,1,1)
+
+            gravity_term(ijk,1,1) = minus_rho_g_over_kappa_fluid(int_radius) * &
+                        jacobianl * wgll_cube(ijk,1,1) &
+                        * (dpotentialdx_with_rot * gxl  &
+                          + dpotentialdy_with_rot * gyl &
+                          + dpotentialdzl * gzl)
+
+            if(istage == 1)then
+            ! divergence of displacement field with gravity on
+            ! note: these calculations are only considered for SIMULATION_TYPE == 1 .and. SAVE_FORWARD
+            !          and one has set MOVIE_VOLUME_TYPE == 4 when MOVIE_VOLUME is .true.;
+            !         in case of SIMULATION_TYPE == 3, it gets overwritten by compute_kernels_outer_core()
+              if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. SIMULATION_TYPE == 1 .and. MOVIE_VOLUME) then
+                div_displfluid(ijk,1,1,ispec) =  &
+                        minus_rho_g_over_kappa_fluid(int_radius) &
+                        * (dpotentialdx_with_rot * gxl &
+                         + dpotentialdy_with_rot * gyl &
+                         + dpotentialdzl * gzl)
+              endif
+            endif
+
+          endif
+
+          tempx1(ijk,1,1) = jacobianl*(xixl*dpotentialdx_with_rot &
+                                   + xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl)
+          tempx2(ijk,1,1) = jacobianl*(etaxl*dpotentialdx_with_rot &
+                                   + etayl*dpotentialdy_with_rot + etazl*dpotentialdzl)
+          tempx3(ijk,1,1) = jacobianl*(gammaxl*dpotentialdx_with_rot &
+                                   + gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl)
+
+        enddo
+#else
     do k=1,NGLLZ
       do j=1,NGLLY
         do i=1,NGLLX
@@ -462,6 +593,7 @@
         enddo
       enddo
     enddo
+#endif
 
     ! subroutines adapted from Deville, Fischer and Mund, High-order methods
     ! for incompressible fluid flow, Cambridge University Press (2002),