[cig-commits] r22592 - seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D

dkomati1 at geodynamics.org dkomati1 at geodynamics.org
Sat Jul 13 16:02:59 PDT 2013


Author: dkomati1
Date: 2013-07-13 16:02:59 -0700 (Sat, 13 Jul 2013)
New Revision: 22592

Modified:
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_kernels.F90
Log:
got rid of expensive temporary array copies


Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_kernels.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_kernels.F90	2013-07-13 22:47:26 UTC (rev 22591)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_kernels.F90	2013-07-13 23:02:59 UTC (rev 22592)
@@ -63,23 +63,14 @@
 
   ! local parameters
   real(kind=CUSTOM_REAL),dimension(21) :: prod
-  real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc
   real(kind=CUSTOM_REAL), dimension(5) :: b_epsilondev_loc
-  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc_matrix,b_epsilondev_loc_matrix
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: eps_trace_over_3_loc_matrix,&
-                                                          b_eps_trace_over_3_loc_matrix
+  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: b_epsilondev_loc_matrix
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: b_eps_trace_over_3_loc_matrix
   integer :: i,j,k,ispec,iglob
 
   ! crust_mantle
   do ispec = 1, NSPEC_CRUST_MANTLE
 
-    eps_trace_over_3_loc_matrix(:,:,:) = eps_trace_over_3_crust_mantle(:,:,:,ispec)
-    epsilondev_loc_matrix(1,:,:,:) = epsilondev_crust_mantle(1,:,:,:,ispec)
-    epsilondev_loc_matrix(2,:,:,:) = epsilondev_crust_mantle(2,:,:,:,ispec)
-    epsilondev_loc_matrix(3,:,:,:) = epsilondev_crust_mantle(3,:,:,:,ispec)
-    epsilondev_loc_matrix(4,:,:,:) = epsilondev_crust_mantle(4,:,:,:,ispec)
-    epsilondev_loc_matrix(5,:,:,:) = epsilondev_crust_mantle(5,:,:,:,ispec)
-
 ! in principle there should also probably be a _noDev() call here as well
 ! and a "if(USE_DEVILLE_PRODUCTS_VAL) then" test, but for now it is not implemented
 ! by lack of time (and nobody uses NGLL /= 5 anyway, thus in practice USE_DEVILLE_PRODUCTS_VAL is always true...)
@@ -112,11 +103,10 @@
              + accel_crust_mantle(2,iglob) * b_displ_crust_mantle(2,iglob) &
              + accel_crust_mantle(3,iglob) * b_displ_crust_mantle(3,iglob) )
 
-          epsilondev_loc(:) = epsilondev_loc_matrix(:,i,j,k)
           b_epsilondev_loc(:) = b_epsilondev_loc_matrix(:,i,j,k)
 
           ! For anisotropic kernels
-          call compute_strain_product(prod,eps_trace_over_3_loc_matrix(i,j,k),epsilondev_loc, &
+          call compute_strain_product(prod,eps_trace_over_3_crust_mantle(i,j,k,ispec),epsilondev_crust_mantle(1,i,j,k,ispec), &
                                       b_eps_trace_over_3_loc_matrix(i,j,k),b_epsilondev_loc)
 
           ! do not use a ":" array syntax for the first index below otherwise
@@ -172,21 +162,23 @@
              + accel_crust_mantle(2,iglob) * b_displ_crust_mantle(2,iglob) &
              + accel_crust_mantle(3,iglob) * b_displ_crust_mantle(3,iglob) )
 
-          epsilondev_loc(:) = epsilondev_loc_matrix(:,i,j,k)
           b_epsilondev_loc(:) = b_epsilondev_loc_matrix(:,i,j,k)
 
           ! kernel for shear modulus, see e.g. Tromp et al. (2005), equation (17)
           ! note: multiplication with 2*mu(x) will be done after the time loop
           beta_kl_crust_mantle(i,j,k,ispec) =  beta_kl_crust_mantle(i,j,k,ispec) &
-             + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
-             + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
-             + 2.d0 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
-              epsilondev_loc(5)*b_epsilondev_loc(5)))
+             + deltat * (epsilondev_crust_mantle(1,i,j,k,ispec)*b_epsilondev_loc(1) &
+             + epsilondev_crust_mantle(2,i,j,k,ispec)*b_epsilondev_loc(2) &
+             + (epsilondev_crust_mantle(1,i,j,k,ispec)+epsilondev_crust_mantle(2,i,j,k,ispec)) &
+             * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
+             + 2.d0 * (epsilondev_crust_mantle(3,i,j,k,ispec)*b_epsilondev_loc(3) &
+             + epsilondev_crust_mantle(4,i,j,k,ispec)*b_epsilondev_loc(4) + &
+              epsilondev_crust_mantle(5,i,j,k,ispec)*b_epsilondev_loc(5)))
 
           ! kernel for bulk modulus, see e.g. Tromp et al. (2005), equation (18)
           ! note: multiplication with kappa(x) will be done after the time loop
           alpha_kl_crust_mantle(i,j,k,ispec) = alpha_kl_crust_mantle(i,j,k,ispec) &
-             + deltat * (9.d0 * eps_trace_over_3_loc_matrix(i,j,k) &
+             + deltat * (9.d0 * eps_trace_over_3_crust_mantle(i,j,k,ispec) &
                               * b_eps_trace_over_3_loc_matrix(i,j,k))
 
         enddo
@@ -642,9 +634,8 @@
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: eps_trace_over_3_inner_core 
 
   ! local parameters
-  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc_matrix,b_epsilondev_loc_matrix
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: eps_trace_over_3_loc_matrix,&
-                                                          b_eps_trace_over_3_loc_matrix
+  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: b_epsilondev_loc_matrix
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: b_eps_trace_over_3_loc_matrix
 
   integer :: ispec,iglob
 
@@ -657,13 +648,6 @@
   ! inner_core
   do ispec = 1, NSPEC_INNER_CORE
 
-    eps_trace_over_3_loc_matrix(:,:,:) = eps_trace_over_3_inner_core(:,:,:,ispec)
-    epsilondev_loc_matrix(1,:,:,:) = epsilondev_inner_core(1,:,:,:,ispec)
-    epsilondev_loc_matrix(2,:,:,:) = epsilondev_inner_core(2,:,:,:,ispec)
-    epsilondev_loc_matrix(3,:,:,:) = epsilondev_inner_core(3,:,:,:,ispec)
-    epsilondev_loc_matrix(4,:,:,:) = epsilondev_inner_core(4,:,:,:,ispec)
-    epsilondev_loc_matrix(5,:,:,:) = epsilondev_inner_core(5,:,:,:,ispec)
-
     call compute_element_strain_undo_att_Dev(ispec,NGLOB_inner_core,NSPEC_inner_core,&
                                              b_displ_inner_core,ibool_inner_core,hprime_xx,hprime_xxT,&
                                              xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz,&
@@ -679,16 +663,16 @@
              + accel_inner_core(3,iglob) * b_displ_inner_core(3,iglob))
 
           beta_kl_inner_core(ijk,1,1,ispec) =  beta_kl_inner_core(ijk,1,1,ispec) &
-             + deltat * (epsilondev_loc_matrix(1,ijk,1,1)*b_epsilondev_loc_matrix(1,ijk,1,1) &
-                + epsilondev_loc_matrix(2,ijk,1,1)*b_epsilondev_loc_matrix(2,ijk,1,1) &
-                + (epsilondev_loc_matrix(1,ijk,1,1)+epsilondev_loc_matrix(2,ijk,1,1)) * &
+             + deltat * (epsilondev_inner_core(1,ijk,1,1,ispec)*b_epsilondev_loc_matrix(1,ijk,1,1) &
+                + epsilondev_inner_core(2,ijk,1,1,ispec)*b_epsilondev_loc_matrix(2,ijk,1,1) &
+                + (epsilondev_inner_core(1,ijk,1,1,ispec)+epsilondev_inner_core(2,ijk,1,1,ispec)) * &
                   (b_epsilondev_loc_matrix(1,ijk,1,1)+b_epsilondev_loc_matrix(2,ijk,1,1)) &
-                + 2.d0 * (epsilondev_loc_matrix(3,ijk,1,1)*b_epsilondev_loc_matrix(3,ijk,1,1) &
-                + epsilondev_loc_matrix(4,ijk,1,1)*b_epsilondev_loc_matrix(4,ijk,1,1) &
-                + epsilondev_loc_matrix(5,ijk,1,1)*b_epsilondev_loc_matrix(5,ijk,1,1)))
+                + 2.d0 * (epsilondev_inner_core(3,ijk,1,1,ispec)*b_epsilondev_loc_matrix(3,ijk,1,1) &
+                + epsilondev_inner_core(4,ijk,1,1,ispec)*b_epsilondev_loc_matrix(4,ijk,1,1) &
+                + epsilondev_inner_core(5,ijk,1,1,ispec)*b_epsilondev_loc_matrix(5,ijk,1,1)))
 
           alpha_kl_inner_core(ijk,1,1,ispec) = alpha_kl_inner_core(ijk,1,1,ispec) &
-                + deltat * (9.d0 * eps_trace_over_3_loc_matrix(ijk,1,1) * b_eps_trace_over_3_loc_matrix(ijk,1,1))
+                + deltat * (9.d0 * eps_trace_over_3_inner_core(ijk,1,1,ispec) * b_eps_trace_over_3_loc_matrix(ijk,1,1))
         enddo
 #else
     do k = 1, NGLLZ
@@ -702,16 +686,16 @@
              + accel_inner_core(3,iglob) * b_displ_inner_core(3,iglob))
 
           beta_kl_inner_core(i,j,k,ispec) =  beta_kl_inner_core(i,j,k,ispec) &
-             + deltat * (epsilondev_loc_matrix(1,i,j,k)*b_epsilondev_loc_matrix(1,i,j,k) &
-                + epsilondev_loc_matrix(2,i,j,k)*b_epsilondev_loc_matrix(2,i,j,k) &
-                + (epsilondev_loc_matrix(1,i,j,k)+epsilondev_loc_matrix(2,i,j,k)) * &
+             + deltat * (epsilondev_inner_core(1,i,j,k,ispec)*b_epsilondev_loc_matrix(1,i,j,k) &
+                + epsilondev_inner_core(2,i,j,k,ispec)*b_epsilondev_loc_matrix(2,i,j,k) &
+                + (epsilondev_inner_core(1,i,j,k,ispec)+epsilondev_inner_core(2,i,j,k,ispec)) * &
                   (b_epsilondev_loc_matrix(1,i,j,k)+b_epsilondev_loc_matrix(2,i,j,k)) &
-                + 2.d0 * (epsilondev_loc_matrix(3,i,j,k)*b_epsilondev_loc_matrix(3,i,j,k) &
-                + epsilondev_loc_matrix(4,i,j,k)*b_epsilondev_loc_matrix(4,i,j,k) &
-                + epsilondev_loc_matrix(5,i,j,k)*b_epsilondev_loc_matrix(5,i,j,k)))
+                + 2.d0 * (epsilondev_inner_core(3,i,j,k,ispec)*b_epsilondev_loc_matrix(3,i,j,k) &
+                + epsilondev_inner_core(4,i,j,k,ispec)*b_epsilondev_loc_matrix(4,i,j,k) &
+                + epsilondev_inner_core(5,i,j,k,ispec)*b_epsilondev_loc_matrix(5,i,j,k)))
 
           alpha_kl_inner_core(i,j,k,ispec) = alpha_kl_inner_core(i,j,k,ispec) &
-                + deltat * (9.d0 * eps_trace_over_3_loc_matrix(i,j,k) * b_eps_trace_over_3_loc_matrix(i,j,k))
+                + deltat * (9.d0 * eps_trace_over_3_inner_core(i,j,k,ispec) * b_eps_trace_over_3_loc_matrix(i,j,k))
         enddo
       enddo
     enddo



More information about the CIG-COMMITS mailing list