[cig-commits] r19954 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src: cuda specfem3D

joseph.charles at geodynamics.org joseph.charles at geodynamics.org
Thu Apr 19 04:40:26 PDT 2012


Author: joseph.charles
Date: 2012-04-19 04:40:25 -0700 (Thu, 19 Apr 2012)
New Revision: 19954

Modified:
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90
Log:
updates routines for attenuation debugging; 1st order Taylor expansion used for epsilon_dev_loc variable

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu	2012-04-17 18:47:43 UTC (rev 19953)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu	2012-04-19 11:40:25 UTC (rev 19954)
@@ -1029,6 +1029,21 @@
       s_dummyy_loc[tx] = d_displ[iglob*3 + 1];
       s_dummyz_loc[tx] = d_displ[iglob*3 + 2];
 #endif
+
+      if( ATTENUATION){
+	// use first order Taylor expansion of displacement for local storage of stresses 
+	// at this current time step, to fix attenuation in a consistent way
+#ifdef USE_TEXTURES
+	s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + d_deltat * tex1Dfetch(tex_veloc, iglob);
+	s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + d_deltat * tex1Dfetch(tex_veloc, iglob + NGLOB);
+	s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + d_deltat * tex1Dfetch(tex_veloc, iglob + 2*NGLOB);
+#else
+	s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + d_deltat * d_veloc[iglob*3];
+	s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + d_deltat * d_veloc[iglob*3 + 1];
+	s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + d_deltat * d_veloc[iglob*3 + 2];
+#endif
+      }
+
     }
 
 // synchronize all the threads (one thread for each of the NGLL grid points of the
@@ -1040,18 +1055,6 @@
 
     if (active) {
 
-      // use first order Taylor expansion of displacement for local storage of stresses 
-      // at this current time step, to fix attenuation in a consistent way
-#ifdef USE_TEXTURES
-      s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + d_deltat * tex1Dfetch(tex_veloc, iglob);
-      s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + d_deltat * tex1Dfetch(tex_veloc, iglob + NGLOB);
-      s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + d_deltat * tex1Dfetch(tex_veloc, iglob + 2*NGLOB);
-#else
-      s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + d_deltat * d_veloc[iglob*3];
-      s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + d_deltat * d_veloc[iglob*3 + 1];
-      s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + d_deltat * d_veloc[iglob*3 + 2];
-#endif
-
 #ifndef MANUALLY_UNROLLED_LOOPS
 
       tempx1l = 0.f;
@@ -1087,21 +1090,23 @@
 
       }
 
-      // temporary variables used for fixing attenuation in a consistent way
 
-      tempx1l_att = 0.f;
-      tempx2l_att = 0.f;
-      tempx3l_att = 0.f;
+      if( ATTENUATION){
+	// temporary variables used for fixing attenuation in a consistent way
 
-      tempy1l_att = 0.f;
-      tempy2l_att = 0.f;
-      tempy3l_att = 0.f;
+	tempx1l_att = 0.f;
+	tempx2l_att = 0.f;
+	tempx3l_att = 0.f;
 
-      tempz1l_att = 0.f;
-      tempz2l_att = 0.f;
-      tempz3l_att = 0.f;
+	tempy1l_att = 0.f;
+	tempy2l_att = 0.f;
+	tempy3l_att = 0.f;
 
-      for (l=0;l<NGLLX;l++) {
+	tempz1l_att = 0.f;
+	tempz2l_att = 0.f;
+	tempz3l_att = 0.f;
+
+	for (l=0;l<NGLLX;l++) {
           hp1 = d_hprime_xx[l*NGLLX+I];
           offset = K*NGLL2+J*NGLLX+l;
           tempx1l_att += s_dummyx_loc_att[offset]*hp1;
@@ -1120,6 +1125,7 @@
           tempy3l_att += s_dummyy_loc_att[offset]*hp3;
           tempz3l_att += s_dummyz_loc_att[offset]*hp3;
 
+	}
       }
 #else
 
@@ -1177,61 +1183,63 @@
               + s_dummyz_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
               + s_dummyz_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
 
-      // temporary variables used for fixing attenuation in a consistent way
+      if( ATTENUATION){
+	// temporary variables used for fixing attenuation in a consistent way
 
-      tempx1l_att = s_dummyx_loc_att[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
-              + s_dummyx_loc_att[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
-              + s_dummyx_loc_att[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
-              + s_dummyx_loc_att[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
-              + s_dummyx_loc_att[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+	tempx1l_att = s_dummyx_loc_att[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+	  + s_dummyx_loc_att[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+	  + s_dummyx_loc_att[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+	  + s_dummyx_loc_att[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+	  + s_dummyx_loc_att[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
 
-      tempy1l_att = s_dummyy_loc_att[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
-              + s_dummyy_loc_att[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
-              + s_dummyy_loc_att[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
-              + s_dummyy_loc_att[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
-              + s_dummyy_loc_att[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+	tempy1l_att = s_dummyy_loc_att[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+	  + s_dummyy_loc_att[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+	  + s_dummyy_loc_att[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+	  + s_dummyy_loc_att[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+	  + s_dummyy_loc_att[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
 
-      tempz1l_att = s_dummyz_loc_att[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
-              + s_dummyz_loc_att[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
-              + s_dummyz_loc_att[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
-              + s_dummyz_loc_att[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
-              + s_dummyz_loc_att[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+	tempz1l_att = s_dummyz_loc_att[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+	  + s_dummyz_loc_att[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+	  + s_dummyz_loc_att[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+	  + s_dummyz_loc_att[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+	  + s_dummyz_loc_att[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
 
-      tempx2l_att = s_dummyx_loc_att[K*NGLL2+I]*d_hprime_xx[J]
-              + s_dummyx_loc_att[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
-              + s_dummyx_loc_att[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
-              + s_dummyx_loc_att[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
-              + s_dummyx_loc_att[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+	tempx2l_att = s_dummyx_loc_att[K*NGLL2+I]*d_hprime_xx[J]
+	  + s_dummyx_loc_att[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+	  + s_dummyx_loc_att[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+	  + s_dummyx_loc_att[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+	  + s_dummyx_loc_att[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
 
-      tempy2l_att = s_dummyy_loc_att[K*NGLL2+I]*d_hprime_xx[J]
-              + s_dummyy_loc_att[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
-              + s_dummyy_loc_att[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
-              + s_dummyy_loc_att[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
-              + s_dummyy_loc_att[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+	tempy2l_att = s_dummyy_loc_att[K*NGLL2+I]*d_hprime_xx[J]
+	  + s_dummyy_loc_att[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+	  + s_dummyy_loc_att[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+	  + s_dummyy_loc_att[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+	  + s_dummyy_loc_att[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
 
-      tempz2l_att = s_dummyz_loc_att[K*NGLL2+I]*d_hprime_xx[J]
-              + s_dummyz_loc_att[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
-              + s_dummyz_loc_att[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
-              + s_dummyz_loc_att[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
-              + s_dummyz_loc_att[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+	tempz2l_att = s_dummyz_loc_att[K*NGLL2+I]*d_hprime_xx[J]
+	  + s_dummyz_loc_att[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+	  + s_dummyz_loc_att[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+	  + s_dummyz_loc_att[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+	  + s_dummyz_loc_att[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
 
-      tempx3l_att = s_dummyx_loc_att[J*NGLLX+I]*d_hprime_xx[K]
-              + s_dummyx_loc_att[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
-              + s_dummyx_loc_att[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
-              + s_dummyx_loc_att[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
-              + s_dummyx_loc_att[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+	tempx3l_att = s_dummyx_loc_att[J*NGLLX+I]*d_hprime_xx[K]
+	  + s_dummyx_loc_att[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+	  + s_dummyx_loc_att[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+	  + s_dummyx_loc_att[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+	  + s_dummyx_loc_att[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
 
-      tempy3l_att = s_dummyy_loc_att[J*NGLLX+I]*d_hprime_xx[K]
-              + s_dummyy_loc_att[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
-              + s_dummyy_loc_att[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
-              + s_dummyy_loc_att[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
-              + s_dummyy_loc_att[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+	tempy3l_att = s_dummyy_loc_att[J*NGLLX+I]*d_hprime_xx[K]
+	  + s_dummyy_loc_att[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+	  + s_dummyy_loc_att[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+	  + s_dummyy_loc_att[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+	  + s_dummyy_loc_att[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
 
-      tempz3l_att = s_dummyz_loc_att[J*NGLLX+I]*d_hprime_xx[K]
-              + s_dummyz_loc_att[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
-              + s_dummyz_loc_att[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
-              + s_dummyz_loc_att[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
-              + s_dummyz_loc_att[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+	tempz3l_att = s_dummyz_loc_att[J*NGLLX+I]*d_hprime_xx[K]
+	  + s_dummyz_loc_att[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+	  + s_dummyz_loc_att[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+	  + s_dummyz_loc_att[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+	  + s_dummyz_loc_att[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+      }
 
 #endif
 
@@ -1268,41 +1276,61 @@
       duzdxl_plus_duxdzl = duzdxl + duxdzl;
       duzdyl_plus_duydzl = duzdyl + duydzl;
 
-      // temporary variables used for fixing attenuation in a consistent way
+      if( ATTENUATION){
+	// temporary variables used for fixing attenuation in a consistent way
 
-      duxdxl_att = xixl*tempx1l_att + etaxl*tempx2l_att + gammaxl*tempx3l_att;
-      duxdyl_att = xiyl*tempx1l_att + etayl*tempx2l_att + gammayl*tempx3l_att;
-      duxdzl_att = xizl*tempx1l_att + etazl*tempx2l_att + gammazl*tempx3l_att;
+	duxdxl_att = xixl*tempx1l_att + etaxl*tempx2l_att + gammaxl*tempx3l_att;
+	duxdyl_att = xiyl*tempx1l_att + etayl*tempx2l_att + gammayl*tempx3l_att;
+	duxdzl_att = xizl*tempx1l_att + etazl*tempx2l_att + gammazl*tempx3l_att;
 
-      duydxl_att = xixl*tempy1l_att + etaxl*tempy2l_att + gammaxl*tempy3l_att;
-      duydyl_att = xiyl*tempy1l_att + etayl*tempy2l_att + gammayl*tempy3l_att;
-      duydzl_att = xizl*tempy1l_att + etazl*tempy2l_att + gammazl*tempy3l_att;
+	duydxl_att = xixl*tempy1l_att + etaxl*tempy2l_att + gammaxl*tempy3l_att;
+	duydyl_att = xiyl*tempy1l_att + etayl*tempy2l_att + gammayl*tempy3l_att;
+	duydzl_att = xizl*tempy1l_att + etazl*tempy2l_att + gammazl*tempy3l_att;
 
-      duzdxl_att = xixl*tempz1l_att + etaxl*tempz2l_att + gammaxl*tempz3l_att;
-      duzdyl_att = xiyl*tempz1l_att + etayl*tempz2l_att + gammayl*tempz3l_att;
-      duzdzl_att = xizl*tempz1l_att + etazl*tempz2l_att + gammazl*tempz3l_att;
+	duzdxl_att = xixl*tempz1l_att + etaxl*tempz2l_att + gammaxl*tempz3l_att;
+	duzdyl_att = xiyl*tempz1l_att + etayl*tempz2l_att + gammayl*tempz3l_att;
+	duzdzl_att = xizl*tempz1l_att + etazl*tempz2l_att + gammazl*tempz3l_att;
 
-      // precompute some sums to save CPU time
-      duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att;
-      duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att;
-      duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att;
+	// precompute some sums to save CPU time
+	duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att;
+	duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att;
+	duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att;
 
-      // computes deviatoric strain attenuation and/or for kernel calculations
-      if(COMPUTE_AND_STORE_STRAIN) {
-	realw templ = 0.33333333333333333333f * (duxdxl_att + duydyl_att + duzdzl_att); // 1./3. = 0.33333
+	// computes deviatoric strain attenuation and/or for kernel calculations
+	if(COMPUTE_AND_STORE_STRAIN) {
+	  realw templ = 0.33333333333333333333f * (duxdxl_att + duydyl_att + duzdzl_att); // 1./3. = 0.33333
 
-        // local storage: stresses at this current time step
-	epsilondev_xx_loc = duxdxl_att - templ;
-        epsilondev_yy_loc = duydyl_att - templ;
-        epsilondev_xy_loc = 0.5f * duxdyl_plus_duydxl_att;
-        epsilondev_xz_loc = 0.5f * duzdxl_plus_duxdzl_att;
-        epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl_att;
+	  // local storage: stresses at this current time step
+	  epsilondev_xx_loc = duxdxl_att - templ;
+	  epsilondev_yy_loc = duydyl_att - templ;
+	  epsilondev_xy_loc = 0.5f * duxdyl_plus_duydxl_att;
+	  epsilondev_xz_loc = 0.5f * duzdxl_plus_duxdzl_att;
+	  epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl_att;
 
-        if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) {
-          epsilon_trace_over_3[tx] = templ;
-        }else{
-          epsilon_trace_over_3[tx + working_element*NGLL3] = templ;
-        }
+	  if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) {
+	    epsilon_trace_over_3[tx] = templ;
+	  }else{
+	    epsilon_trace_over_3[tx + working_element*NGLL3] = templ;
+	  }
+	}
+      }else{
+	// computes deviatoric strain attenuation and/or for kernel calculations
+	if(COMPUTE_AND_STORE_STRAIN) {
+	  realw templ = 0.33333333333333333333f * (duxdxl + duydyl + duzdzl); // 1./3. = 0.33333
+
+	  // local storage: stresses at this current time step
+	  epsilondev_xx_loc = duxdxl - templ;
+	  epsilondev_yy_loc = duydyl - templ;
+	  epsilondev_xy_loc = 0.5f * duxdyl_plus_duydxl;
+	  epsilondev_xz_loc = 0.5f * duzdxl_plus_duxdzl;
+	  epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl;
+
+	  if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) {
+	    epsilon_trace_over_3[tx] = templ;
+	  }else{
+	    epsilon_trace_over_3[tx + working_element*NGLL3] = templ;
+	  }
+	}
       }
 
       // attenuation

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu	2012-04-17 18:47:43 UTC (rev 19953)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_inner_core_cuda.cu	2012-04-19 11:40:25 UTC (rev 19954)
@@ -298,8 +298,10 @@
                                         int* d_phase_ispec_inner,
                                         int num_phase_ispec,
                                         int d_iphase,
+                                        realw d_deltat, 
                                         int use_mesh_coloring_gpu,
                                         realw* d_displ,
+					realw* d_veloc,
                                         realw* d_accel,
                                         realw* d_xix, realw* d_xiy, realw* d_xiz,
                                         realw* d_etax, realw* d_etay, realw* d_etaz,
@@ -353,6 +355,11 @@
   reald duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl;
   reald duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl;
   reald duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl;
+
+  reald tempx1l_att,tempx2l_att,tempx3l_att,tempy1l_att,tempy2l_att,tempy3l_att,tempz1l_att,tempz2l_att,tempz3l_att;
+  reald duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att,duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att;
+  reald duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att;
+
   reald fac1,fac2,fac3;
   reald lambdal,mul,lambdalplus2mul,kappal;
   reald mul_iso,mul_aniso;
@@ -374,6 +381,10 @@
     __shared__ reald s_dummyy_loc[NGLL3];
     __shared__ reald s_dummyz_loc[NGLL3];
 
+    __shared__ reald s_dummyx_loc_att[NGLL3];
+    __shared__ reald s_dummyy_loc_att[NGLL3];
+    __shared__ reald s_dummyz_loc_att[NGLL3];
+
     __shared__ reald s_tempx1[NGLL3];
     __shared__ reald s_tempx2[NGLL3];
     __shared__ reald s_tempx3[NGLL3];
@@ -421,6 +432,21 @@
         s_dummyy_loc[tx] = d_displ[iglob*3 + 1];
         s_dummyz_loc[tx] = d_displ[iglob*3 + 2];
 #endif
+
+	if( ATTENUATION ){
+	  // use first order Taylor expansion of displacement for local storage of stresses 
+	  // at this current time step, to fix attenuation in a consistent way
+#ifdef USE_TEXTURES
+	  s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + d_deltat * tex1Dfetch(tex_veloc, iglob);
+	  s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + d_deltat * tex1Dfetch(tex_veloc, iglob + NGLOB);
+	  s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + d_deltat * tex1Dfetch(tex_veloc, iglob + 2*NGLOB);
+#else
+	  s_dummyx_loc_att[tx] = s_dummyx_loc[tx] + d_deltat * d_veloc[iglob*3];
+	  s_dummyy_loc_att[tx] = s_dummyy_loc[tx] + d_deltat * d_veloc[iglob*3 + 1];
+	  s_dummyz_loc_att[tx] = s_dummyz_loc[tx] + d_deltat * d_veloc[iglob*3 + 2];
+#endif
+	}
+
       }
     }
 
@@ -467,6 +493,43 @@
           tempz3l += s_dummyz_loc[offset]*hp3;
 
       }
+
+      if( ATTENUATION ){
+	// temporary variables used for fixing attenuation in a consistent way
+
+	tempx1l_att = 0.f;
+	tempx2l_att = 0.f;
+	tempx3l_att = 0.f;
+
+	tempy1l_att = 0.f;
+	tempy2l_att = 0.f;
+	tempy3l_att = 0.f;
+
+	tempz1l_att = 0.f;
+	tempz2l_att = 0.f;
+	tempz3l_att = 0.f;
+
+	for (l=0;l<NGLLX;l++) {
+          hp1 = d_hprime_xx[l*NGLLX+I];
+          offset = K*NGLL2+J*NGLLX+l;
+          tempx1l_att += s_dummyx_loc_att[offset]*hp1;
+          tempy1l_att += s_dummyy_loc_att[offset]*hp1;
+          tempz1l_att += s_dummyz_loc_att[offset]*hp1;
+
+          hp2 = d_hprime_xx[l*NGLLX+J];
+          offset = K*NGLL2+l*NGLLX+I;
+          tempx2l_att += s_dummyx_loc_att[offset]*hp2;
+          tempy2l_att += s_dummyy_loc_att[offset]*hp2;
+          tempz2l_att += s_dummyz_loc_att[offset]*hp2;
+
+          hp3 = d_hprime_xx[l*NGLLX+K];
+          offset = l*NGLL2+J*NGLLX+I;
+          tempx3l_att += s_dummyx_loc_att[offset]*hp3;
+          tempy3l_att += s_dummyy_loc_att[offset]*hp3;
+          tempz3l_att += s_dummyz_loc_att[offset]*hp3;
+
+	}
+      }
 #else
 
       tempx1l = s_dummyx_loc[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
@@ -523,6 +586,64 @@
               + s_dummyz_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
               + s_dummyz_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
 
+      if( ATTENUATION ){
+	// temporary variables used for fixing attenuation in a consistent way
+
+	tempx1l_att = s_dummyx_loc_att[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+	  + s_dummyx_loc_att[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+	  + s_dummyx_loc_att[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+	  + s_dummyx_loc_att[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+	  + s_dummyx_loc_att[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+	tempy1l_att = s_dummyy_loc_att[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+	  + s_dummyy_loc_att[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+	  + s_dummyy_loc_att[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+	  + s_dummyy_loc_att[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+	  + s_dummyy_loc_att[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+	tempz1l_att = s_dummyz_loc_att[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
+	  + s_dummyz_loc_att[K*NGLL2+J*NGLLX+1]*d_hprime_xx[NGLLX+I]
+	  + s_dummyz_loc_att[K*NGLL2+J*NGLLX+2]*d_hprime_xx[2*NGLLX+I]
+	  + s_dummyz_loc_att[K*NGLL2+J*NGLLX+3]*d_hprime_xx[3*NGLLX+I]
+	  + s_dummyz_loc_att[K*NGLL2+J*NGLLX+4]*d_hprime_xx[4*NGLLX+I];
+
+	tempx2l_att = s_dummyx_loc_att[K*NGLL2+I]*d_hprime_xx[J]
+	  + s_dummyx_loc_att[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+	  + s_dummyx_loc_att[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+	  + s_dummyx_loc_att[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+	  + s_dummyx_loc_att[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+	tempy2l_att = s_dummyy_loc_att[K*NGLL2+I]*d_hprime_xx[J]
+	  + s_dummyy_loc_att[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+	  + s_dummyy_loc_att[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+	  + s_dummyy_loc_att[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+	  + s_dummyy_loc_att[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+	tempz2l_att = s_dummyz_loc_att[K*NGLL2+I]*d_hprime_xx[J]
+	  + s_dummyz_loc_att[K*NGLL2+NGLLX+I]*d_hprime_xx[NGLLX+J]
+	  + s_dummyz_loc_att[K*NGLL2+2*NGLLX+I]*d_hprime_xx[2*NGLLX+J]
+	  + s_dummyz_loc_att[K*NGLL2+3*NGLLX+I]*d_hprime_xx[3*NGLLX+J]
+	  + s_dummyz_loc_att[K*NGLL2+4*NGLLX+I]*d_hprime_xx[4*NGLLX+J];
+
+	tempx3l_att = s_dummyx_loc_att[J*NGLLX+I]*d_hprime_xx[K]
+	  + s_dummyx_loc_att[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+	  + s_dummyx_loc_att[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+	  + s_dummyx_loc_att[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+	  + s_dummyx_loc_att[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+
+	tempy3l_att = s_dummyy_loc_att[J*NGLLX+I]*d_hprime_xx[K]
+	  + s_dummyy_loc_att[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+	  + s_dummyy_loc_att[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+	  + s_dummyy_loc_att[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+	  + s_dummyy_loc_att[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+
+	tempz3l_att = s_dummyz_loc_att[J*NGLLX+I]*d_hprime_xx[K]
+	  + s_dummyz_loc_att[NGLL2+J*NGLLX+I]*d_hprime_xx[NGLLX+K]
+	  + s_dummyz_loc_att[2*NGLL2+J*NGLLX+I]*d_hprime_xx[2*NGLLX+K]
+	  + s_dummyz_loc_att[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
+	  + s_dummyz_loc_att[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
+      }
+
 #endif
 
 // compute derivatives of ux, uy and uz with respect to x, y and z
@@ -558,20 +679,57 @@
       duzdxl_plus_duxdzl = duzdxl + duxdzl;
       duzdyl_plus_duydzl = duzdyl + duydzl;
 
-      // computes deviatoric strain attenuation and/or for kernel calculations
-      if(COMPUTE_AND_STORE_STRAIN) {
-        realw templ = 0.33333333333333333333f * (duxdxl + duydyl + duzdzl); // 1./3. = 0.33333
+      if( ATTENUATION ){
+	// temporary variables used for fixing attenuation in a consistent way
 
-        // local storage: stresses at this current time step
-        epsilondev_xx_loc = duxdxl - templ;
-        epsilondev_yy_loc = duydyl - templ;
-        epsilondev_xy_loc = 0.5f * duxdyl_plus_duydxl;
-        epsilondev_xz_loc = 0.5f * duzdxl_plus_duxdzl;
-        epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl;
+	duxdxl_att = xixl*tempx1l_att + etaxl*tempx2l_att + gammaxl*tempx3l_att;
+	duxdyl_att = xiyl*tempx1l_att + etayl*tempx2l_att + gammayl*tempx3l_att;
+	duxdzl_att = xizl*tempx1l_att + etazl*tempx2l_att + gammazl*tempx3l_att;
 
-        if(SIMULATION_TYPE == 3) {
-          epsilon_trace_over_3[tx + working_element*NGLL3] = templ;
-        }
+	duydxl_att = xixl*tempy1l_att + etaxl*tempy2l_att + gammaxl*tempy3l_att;
+	duydyl_att = xiyl*tempy1l_att + etayl*tempy2l_att + gammayl*tempy3l_att;
+	duydzl_att = xizl*tempy1l_att + etazl*tempy2l_att + gammazl*tempy3l_att;
+
+	duzdxl_att = xixl*tempz1l_att + etaxl*tempz2l_att + gammaxl*tempz3l_att;
+	duzdyl_att = xiyl*tempz1l_att + etayl*tempz2l_att + gammayl*tempz3l_att;
+	duzdzl_att = xizl*tempz1l_att + etazl*tempz2l_att + gammazl*tempz3l_att;
+
+	// precompute some sums to save CPU time
+	duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att;
+	duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att;
+	duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att;
+
+	// computes deviatoric strain attenuation and/or for kernel calculations
+	if(COMPUTE_AND_STORE_STRAIN) {
+	  realw templ = 0.33333333333333333333f * (duxdxl_att + duydyl_att + duzdzl_att); // 1./3. = 0.33333
+
+	  // local storage: stresses at this current time step
+	  epsilondev_xx_loc = duxdxl_att - templ;
+	  epsilondev_yy_loc = duydyl_att - templ;
+	  epsilondev_xy_loc = 0.5f * duxdyl_plus_duydxl_att;
+	  epsilondev_xz_loc = 0.5f * duzdxl_plus_duxdzl_att;
+	  epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl_att;
+
+	  if(SIMULATION_TYPE == 3) {
+	    epsilon_trace_over_3[tx + working_element*NGLL3] = templ;
+	  }
+	}
+      }else{
+	// computes deviatoric strain attenuation and/or for kernel calculations
+	if(COMPUTE_AND_STORE_STRAIN) {
+	  realw templ = 0.33333333333333333333f * (duxdxl + duydyl + duzdzl); // 1./3. = 0.33333
+
+	  // local storage: stresses at this current time step
+	  epsilondev_xx_loc = duxdxl - templ;
+	  epsilondev_yy_loc = duydyl - templ;
+	  epsilondev_xy_loc = 0.5f * duxdyl_plus_duydxl;
+	  epsilondev_xz_loc = 0.5f * duzdxl_plus_duxdzl;
+	  epsilondev_yz_loc = 0.5f * duzdyl_plus_duydzl;
+
+	  if(SIMULATION_TYPE == 3) {
+	    epsilon_trace_over_3[tx + working_element*NGLL3] = templ;
+	  }
+	}
       }
 
       // compute elements with an elastic isotropic rheology
@@ -877,6 +1035,7 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 void Kernel_2_inner_core(int nb_blocks_to_compute,Mesh* mp,
+			realw d_deltat, 
                         int d_iphase,
                         int* d_ibool,
                         int* d_idoubling,
@@ -946,8 +1105,10 @@
                                   mp->d_phase_ispec_inner_inner_core,
                                   mp->num_phase_ispec_inner_core,
                                   d_iphase,
+				  d_deltat,	     
                                   mp->use_mesh_coloring_gpu,
                                   mp->d_displ_inner_core,
+				  mp->d_veloc_inner_core,
                                   mp->d_accel_inner_core,
                                   d_xix, d_xiy, d_xiz,
                                   d_etax, d_etay, d_etaz,
@@ -989,8 +1150,10 @@
                                      mp->d_phase_ispec_inner_inner_core,
                                      mp->num_phase_ispec_inner_core,
                                      d_iphase,
+				     d_deltat,
                                      mp->use_mesh_coloring_gpu,
                                      mp->d_b_displ_inner_core,
+                                     mp->d_b_veloc_inner_core,
                                      mp->d_b_accel_inner_core,
                                      d_xix, d_xiy, d_xiz,
                                      d_etax, d_etay, d_etaz,
@@ -1044,7 +1207,8 @@
 extern "C"
 void FC_FUNC_(compute_forces_inner_core_cuda,
               COMPUTE_FORCES_INNER_CORE_CUDA)(long* Mesh_pointer_f,
-                                                int* iphase) {
+					      realw* deltat,
+					      int* iphase) {
 
   TRACE("compute_forces_inner_core_cuda");
 
@@ -1111,6 +1275,7 @@
       //}
 
       Kernel_2_inner_core(nb_blocks_to_compute,mp,
+			  *deltat,
                           *iphase,
                           mp->d_ibool_inner_core + color_offset_nonpadded,
                           mp->d_idoubling_inner_core + color_offset_ispec,
@@ -1171,6 +1336,7 @@
     // no mesh coloring: uses atomic updates
 
     Kernel_2_inner_core(num_elements,mp,
+			*deltat,
                         *iphase,
                         mp->d_ibool_inner_core,
                         mp->d_idoubling_inner_core,

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90	2012-04-17 18:47:43 UTC (rev 19953)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90	2012-04-19 11:40:25 UTC (rev 19954)
@@ -383,6 +383,7 @@
 
   implicit none
 
+
   include "constants.h"
 
   ! include values created by the mesher

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90	2012-04-17 18:47:43 UTC (rev 19953)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90	2012-04-19 11:40:25 UTC (rev 19954)
@@ -26,7 +26,10 @@
 !=====================================================================
 
   subroutine compute_forces_crust_mantle(NSPEC,NGLOB,NSPEC_ATT, &
-                                        displ_crust_mantle,accel_crust_mantle, &
+                                        deltat, &
+                                        displ_crust_mantle, &
+                                        veloc_crust_mantle, &
+                                        accel_crust_mantle, &
                                         phase_is_inner, &
                                         R_xx,R_yy,R_xy,R_xz,R_yz, &
                                         epsilondev_xx,epsilondev_yy,epsilondev_xy, &
@@ -68,9 +71,13 @@
 
   integer :: NSPEC,NGLOB,NSPEC_ATT
 
-  ! displacement and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_crust_mantle,accel_crust_mantle
+  ! time step
+  real(kind=CUSTOM_REAL) deltat
 
+  ! displacement, velocity and acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: veloc_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_crust_mantle
 
   ! memory variables for attenuation
   ! memory variables R_ij are stored at the local rather than global level
@@ -142,6 +149,14 @@
   real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l
   real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l
 
+  real(kind=CUSTOM_REAL) tempx1l_att,tempx2l_att,tempx3l_att
+  real(kind=CUSTOM_REAL) tempy1l_att,tempy2l_att,tempy3l_att
+  real(kind=CUSTOM_REAL) tempz1l_att,tempz2l_att,tempz3l_att
+
+  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
+  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att;
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att;
+
   ! for gravity
   integer int_radius
   real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
@@ -216,6 +231,57 @@
             tempz3l = tempz3l + displ_crust_mantle(3,iglob)*hp3
           enddo
 
+          if( ATTENUATION_VAL ) then
+             ! temporary variables used for fixing attenuation in a consistent way
+
+             tempx1l_att = 0._CUSTOM_REAL
+             tempx2l_att = 0._CUSTOM_REAL
+             tempx3l_att = 0._CUSTOM_REAL
+
+             tempy1l_att = 0._CUSTOM_REAL
+             tempy2l_att = 0._CUSTOM_REAL
+             tempy3l_att = 0._CUSTOM_REAL
+
+             tempz1l_att = 0._CUSTOM_REAL
+             tempz2l_att = 0._CUSTOM_REAL
+             tempz3l_att = 0._CUSTOM_REAL
+
+             ! use first order Taylor expansion of displacement for local storage of stresses 
+             ! at this current time step, to fix attenuation in a consistent way
+             do l=1,NGLLX
+                hp1 = hprime_xx(i,l)
+                iglob = ibool(l,j,k,ispec)
+                tempx1l_att = tempx1l_att + &
+                     (displ_crust_mantle(1,iglob) + deltat*veloc_crust_mantle(1,iglob))*hp1
+                tempy1l_att = tempy1l_att + &
+                     (displ_crust_mantle(2,iglob) + deltat*veloc_crust_mantle(2,iglob))*hp1
+                tempz1l_att = tempz1l_att + &
+                     (displ_crust_mantle(3,iglob) + deltat*veloc_crust_mantle(3,iglob))*hp1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+                hp2 = hprime_yy(j,l)
+                iglob = ibool(i,l,k,ispec)
+                tempx2l_att = tempx2l_att + &
+                     (displ_crust_mantle(1,iglob) + deltat*veloc_crust_mantle(1,iglob))*hp2
+                tempy2l_att = tempy2l_att + &
+                     (displ_crust_mantle(2,iglob) + deltat*veloc_crust_mantle(2,iglob))*hp2
+                tempz2l_att = tempz2l_att + &
+                     (displ_crust_mantle(3,iglob) + deltat*veloc_crust_mantle(3,iglob))*hp2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+                hp3 = hprime_zz(k,l)
+                iglob = ibool(i,j,l,ispec)
+                tempx3l_att = tempx3l_att + &
+                     (displ_crust_mantle(1,iglob) + deltat*veloc_crust_mantle(1,iglob))*hp3
+                tempy3l_att = tempy3l_att + &
+                     (displ_crust_mantle(2,iglob) + deltat*veloc_crust_mantle(2,iglob))*hp3
+                tempz3l_att = tempz3l_att + &
+                     (displ_crust_mantle(3,iglob) + deltat*veloc_crust_mantle(3,iglob))*hp3
+             enddo
+          endif
+
 !         get derivatives of ux, uy and uz with respect to x, y and z
 
           xixl = xix(i,j,k,ispec)
@@ -253,19 +319,54 @@
           duzdxl_plus_duxdzl = duzdxl + duxdzl
           duzdyl_plus_duydzl = duzdyl + duydzl
 
-! compute deviatoric strain
-          if (COMPUTE_AND_STORE_STRAIN) then
-            if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
-              ispec_strain = 1
-            else
-              ispec_strain = ispec
-            endif
-            epsilon_trace_over_3(i,j,k,ispec_strain) = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-            epsilondev_loc(1,i,j,k) = duxdxl - epsilon_trace_over_3(i,j,k,ispec_strain)
-            epsilondev_loc(2,i,j,k) = duydyl - epsilon_trace_over_3(i,j,k,ispec_strain)
-            epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
-            epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
-            epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+          if( ATTENUATION_VAL ) then
+             ! temporary variables used for fixing attenuation in a consistent way
+             duxdxl_att = xixl*tempx1l_att + etaxl*tempx2l_att + gammaxl*tempx3l_att
+             duxdyl_att = xiyl*tempx1l_att + etayl*tempx2l_att + gammayl*tempx3l_att
+             duxdzl_att = xizl*tempx1l_att + etazl*tempx2l_att + gammazl*tempx3l_att
+
+             duydxl_att = xixl*tempy1l_att + etaxl*tempy2l_att + gammaxl*tempy3l_att
+             duydyl_att = xiyl*tempy1l_att + etayl*tempy2l_att + gammayl*tempy3l_att
+             duydzl_att = xizl*tempy1l_att + etazl*tempy2l_att + gammazl*tempy3l_att
+
+             duzdxl_att = xixl*tempz1l_att + etaxl*tempz2l_att + gammaxl*tempz3l_att
+             duzdyl_att = xiyl*tempz1l_att + etayl*tempz2l_att + gammayl*tempz3l_att
+             duzdzl_att = xizl*tempz1l_att + etazl*tempz2l_att + gammazl*tempz3l_att
+
+             ! precompute some sums to save CPU time
+             duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
+             duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
+             duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
+
+             ! compute deviatoric strain
+             if (COMPUTE_AND_STORE_STRAIN) then
+                if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
+                   ispec_strain = 1
+                else
+                   ispec_strain = ispec
+                endif
+                epsilon_trace_over_3(i,j,k,ispec_strain) = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
+                epsilondev_loc(1,i,j,k) = duxdxl_att - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(2,i,j,k) = duydyl_att - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl_att
+                epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
+                epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl_att
+             endif
+          else 
+             ! compute deviatoric strain
+             if (COMPUTE_AND_STORE_STRAIN) then
+                if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
+                   ispec_strain = 1
+                else
+                   ispec_strain = ispec
+                endif
+                epsilon_trace_over_3(i,j,k,ispec_strain) = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+                epsilondev_loc(1,i,j,k) = duxdxl - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(2,i,j,k) = duydyl - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
+                epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
+                epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+             endif
           endif
 
           ! precompute terms for attenuation if needed

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90	2012-04-17 18:47:43 UTC (rev 19953)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90	2012-04-19 11:40:25 UTC (rev 19954)
@@ -35,7 +35,10 @@
 !          the original routines are commented with "! way 1", the hand-optimized routines with  "! way 2"
 
   subroutine compute_forces_crust_mantle_Dev( NSPEC,NGLOB,NSPEC_ATT, &
-                                              displ_crust_mantle,accel_crust_mantle, &
+                                              deltat, &
+                                              displ_crust_mantle, &
+                                              veloc_crust_mantle, &
+                                              accel_crust_mantle, &
                                               phase_is_inner, &
                                               R_xx,R_yy,R_xy,R_xz,R_yz, &
                                               epsilondev_xx,epsilondev_yy,epsilondev_xy, &
@@ -78,9 +81,14 @@
 
   integer :: NSPEC,NGLOB,NSPEC_ATT
 
-  ! displacement and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_crust_mantle,accel_crust_mantle
+  ! time step
+  real(kind=CUSTOM_REAL) deltat
 
+  ! displacement, velocity and acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: veloc_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_crust_mantle
+
   ! attenuation
   ! memory variables for attenuation
   ! memory variables R_ij are stored at the local rather than global level
@@ -126,6 +134,19 @@
   equivalence(newtempy1,E2_m1_m2_5points)
   equivalence(newtempz1,E3_m1_m2_5points)
 
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+    tempx1_att,tempx2_att,tempx3_att,tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc_att,dummyy_loc_att,dummyz_loc_att
+  real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points_att,B2_m1_m2_5points_att,B3_m1_m2_5points_att
+  real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points_att,C2_m1_m2_5points_att,C3_m1_m2_5points_att
+
+  equivalence(dummyx_loc_att,B1_m1_m2_5points_att)
+  equivalence(dummyy_loc_att,B2_m1_m2_5points_att)
+  equivalence(dummyz_loc_att,B3_m1_m2_5points_att)
+  equivalence(tempx1_att,C1_m1_m2_5points_att)
+  equivalence(tempy1_att,C2_m1_m2_5points_att)
+  equivalence(tempz1_att,C3_m1_m2_5points_att)
+
   real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
     A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
   real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
@@ -143,6 +164,18 @@
   equivalence(newtempy3,E2_mxm_m2_m1_5points)
   equivalence(newtempz3,E3_mxm_m2_m1_5points)
 
+  real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+    A1_mxm_m2_m1_5points_att,A2_mxm_m2_m1_5points_att,A3_mxm_m2_m1_5points_att
+  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+    C1_mxm_m2_m1_5points_att,C2_mxm_m2_m1_5points_att,C3_mxm_m2_m1_5points_att
+
+  equivalence(dummyx_loc_att,A1_mxm_m2_m1_5points_att)
+  equivalence(dummyy_loc_att,A2_mxm_m2_m1_5points_att)
+  equivalence(dummyz_loc_att,A3_mxm_m2_m1_5points_att)
+  equivalence(tempx3_att,C1_mxm_m2_m1_5points_att)
+  equivalence(tempy3_att,C2_mxm_m2_m1_5points_att)
+  equivalence(tempz3_att,C3_mxm_m2_m1_5points_att)
+
   real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms
 
   real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
@@ -214,128 +247,313 @@
 ! way 1:
         do i=1,NGLLX
             iglob1 = ibool(i,j,k,ispec)
-            dummyx_loc(i,j,k) = displ_crust_mantle(1,iglob1)
-            dummyy_loc(i,j,k) = displ_crust_mantle(2,iglob1)
-            dummyz_loc(i,j,k) = displ_crust_mantle(3,iglob1)
+            dummyx_loc(i,j,k) = displ_crust_mantle(1,iglob1) 
+            dummyy_loc(i,j,k) = displ_crust_mantle(2,iglob1) 
+            dummyz_loc(i,j,k) = displ_crust_mantle(3,iglob1) 
         enddo
+
 #endif
       enddo
     enddo
+
+    if( ATTENUATION_VAL ) then
+       ! use first order Taylor expansion of displacement for local storage of stresses 
+       ! at this current time step, to fix attenuation in a consistent way
+
+       do k=1,NGLLZ
+          do j=1,NGLLY
+
+#ifdef _HANDOPT
+             ! way 2:
+             ! since we know that NGLLX = 5, this should help pipelining
+             iglobv5(:) = ibool(:,j,k,ispec)
+
+             dummyx_loc_att(1,j,k) = displ_crust_mantle(1,iglobv5(1)) + deltat*veloc_crust_mantle(1,iglobv5(1))
+             dummyy_loc_att(1,j,k) = displ_crust_mantle(2,iglobv5(1)) + deltat*veloc_crust_mantle(2,iglobv5(1))
+             dummyz_loc_att(1,j,k) = displ_crust_mantle(3,iglobv5(1)) + deltat*veloc_crust_mantle(3,iglobv5(1))
+
+             dummyx_loc_att(2,j,k) = displ_crust_mantle(1,iglobv5(2)) + deltat*veloc_crust_mantle(1,iglobv5(2))
+             dummyy_loc_att(2,j,k) = displ_crust_mantle(2,iglobv5(2)) + deltat*veloc_crust_mantle(2,iglobv5(2))
+             dummyz_loc_att(2,j,k) = displ_crust_mantle(3,iglobv5(2)) + deltat*veloc_crust_mantle(3,iglobv5(2))
+
+             dummyx_loc_att(3,j,k) = displ_crust_mantle(1,iglobv5(3)) + deltat*veloc_crust_mantle(1,iglobv5(3))
+             dummyy_loc_att(3,j,k) = displ_crust_mantle(2,iglobv5(3)) + deltat*veloc_crust_mantle(2,iglobv5(3))
+             dummyz_loc_att(3,j,k) = displ_crust_mantle(3,iglobv5(3)) + deltat*veloc_crust_mantle(3,iglobv5(3))
+
+             dummyx_loc_att(4,j,k) = displ_crust_mantle(1,iglobv5(4)) + deltat*veloc_crust_mantle(1,iglobv5(4))
+             dummyy_loc_att(4,j,k) = displ_crust_mantle(2,iglobv5(4)) + deltat*veloc_crust_mantle(2,iglobv5(4))
+             dummyz_loc_att(4,j,k) = displ_crust_mantle(3,iglobv5(4)) + deltat*veloc_crust_mantle(3,iglobv5(4))
+
+             dummyx_loc_att(5,j,k) = displ_crust_mantle(1,iglobv5(5)) + deltat*veloc_crust_mantle(1,iglobv5(5))
+             dummyy_loc_att(5,j,k) = displ_crust_mantle(2,iglobv5(5)) + deltat*veloc_crust_mantle(2,iglobv5(5))
+             dummyz_loc_att(5,j,k) = displ_crust_mantle(3,iglobv5(5)) + deltat*veloc_crust_mantle(3,iglobv5(5))
+
+#else
+             ! way 1:
+             do i=1,NGLLX
+                iglob1 = ibool(i,j,k,ispec)
+                dummyx_loc_att(i,j,k) = displ_crust_mantle(1,iglob1) + deltat*veloc_crust_mantle(1,iglob1)
+                dummyy_loc_att(i,j,k) = displ_crust_mantle(2,iglob1) + deltat*veloc_crust_mantle(2,iglob1)
+                dummyz_loc_att(i,j,k) = displ_crust_mantle(3,iglob1) + deltat*veloc_crust_mantle(3,iglob1)
+             enddo
+
+#endif
+          enddo
+       enddo
+    endif
+
     do j=1,m2
-      do i=1,m1
-        C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
-                              hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
-                              hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
-                              hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
-                              hprime_xx(i,5)*B1_m1_m2_5points(5,j)
+       do i=1,m1
+          C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
+               hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
+               hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
+               hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
+               hprime_xx(i,5)*B1_m1_m2_5points(5,j)
 
-        C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
-                              hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
-                              hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
-                              hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
-                              hprime_xx(i,5)*B2_m1_m2_5points(5,j)
+          C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
+               hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
+               hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
+               hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
+               hprime_xx(i,5)*B2_m1_m2_5points(5,j)
 
-        C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
-                              hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
-                              hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
-                              hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
-                              hprime_xx(i,5)*B3_m1_m2_5points(5,j)
-      enddo
+          C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
+               hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
+               hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
+               hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
+               hprime_xx(i,5)*B3_m1_m2_5points(5,j)
+       enddo
     enddo
+
+    if( ATTENUATION_VAL ) then
+       ! temporary variables used for fixing attenuation in a consistent way
+       do j=1,m2
+          do i=1,m1
+             C1_m1_m2_5points_att(i,j) = hprime_xx(i,1)*B1_m1_m2_5points_att(1,j) + &
+                  hprime_xx(i,2)*B1_m1_m2_5points_att(2,j) + &
+                  hprime_xx(i,3)*B1_m1_m2_5points_att(3,j) + &
+                  hprime_xx(i,4)*B1_m1_m2_5points_att(4,j) + &
+                  hprime_xx(i,5)*B1_m1_m2_5points_att(5,j)
+
+             C2_m1_m2_5points_att(i,j) = hprime_xx(i,1)*B2_m1_m2_5points_att(1,j) + &
+                  hprime_xx(i,2)*B2_m1_m2_5points_att(2,j) + &
+                  hprime_xx(i,3)*B2_m1_m2_5points_att(3,j) + &
+                  hprime_xx(i,4)*B2_m1_m2_5points_att(4,j) + &
+                  hprime_xx(i,5)*B2_m1_m2_5points_att(5,j)
+
+             C3_m1_m2_5points_att(i,j) = hprime_xx(i,1)*B3_m1_m2_5points_att(1,j) + &
+                  hprime_xx(i,2)*B3_m1_m2_5points_att(2,j) + &
+                  hprime_xx(i,3)*B3_m1_m2_5points_att(3,j) + &
+                  hprime_xx(i,4)*B3_m1_m2_5points_att(4,j) + &
+                  hprime_xx(i,5)*B3_m1_m2_5points_att(5,j)
+          enddo
+       enddo
+    endif
+
     do j=1,m1
-      do i=1,m1
-        ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-        do k = 1,NGLLX
-          tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
-                        dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
-                        dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
-                        dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
-                        dummyx_loc(i,5,k)*hprime_xxT(5,j)
+       do i=1,m1
+          ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+          do k = 1,NGLLX
+             tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
+                  dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
+                  dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
+                  dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
+                  dummyx_loc(i,5,k)*hprime_xxT(5,j)
 
-          tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
-                        dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
-                        dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
-                        dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
-                        dummyy_loc(i,5,k)*hprime_xxT(5,j)
+             tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
+                  dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
+                  dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
+                  dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
+                  dummyy_loc(i,5,k)*hprime_xxT(5,j)
 
-          tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
-                        dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
-                        dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
-                        dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
-                        dummyz_loc(i,5,k)*hprime_xxT(5,j)
-        enddo
-      enddo
+             tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
+                  dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
+                  dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
+                  dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
+                  dummyz_loc(i,5,k)*hprime_xxT(5,j)
+          enddo
+       enddo
     enddo
+
+    if( ATTENUATION_VAL ) then
+       ! temporary variables used for fixing attenuation in a consistent way
+       do j=1,m1
+          do i=1,m1
+             ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+             do k = 1,NGLLX
+                tempx2_att(i,j,k) = dummyx_loc_att(i,1,k)*hprime_xxT(1,j) + &
+                     dummyx_loc_att(i,2,k)*hprime_xxT(2,j) + &
+                     dummyx_loc_att(i,3,k)*hprime_xxT(3,j) + &
+                     dummyx_loc_att(i,4,k)*hprime_xxT(4,j) + &
+                     dummyx_loc_att(i,5,k)*hprime_xxT(5,j)
+
+                tempy2_att(i,j,k) = dummyy_loc_att(i,1,k)*hprime_xxT(1,j) + &
+                     dummyy_loc_att(i,2,k)*hprime_xxT(2,j) + &
+                     dummyy_loc_att(i,3,k)*hprime_xxT(3,j) + &
+                     dummyy_loc_att(i,4,k)*hprime_xxT(4,j) + &
+                     dummyy_loc_att(i,5,k)*hprime_xxT(5,j)
+
+                tempz2_att(i,j,k) = dummyz_loc_att(i,1,k)*hprime_xxT(1,j) + &
+                     dummyz_loc_att(i,2,k)*hprime_xxT(2,j) + &
+                     dummyz_loc_att(i,3,k)*hprime_xxT(3,j) + &
+                     dummyz_loc_att(i,4,k)*hprime_xxT(4,j) + &
+                     dummyz_loc_att(i,5,k)*hprime_xxT(5,j)
+             enddo
+          enddo
+       enddo
+    endif
+
     do j=1,m1
-      do i=1,m2
-        C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
-                                  A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
-                                  A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
-                                  A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
-                                  A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+       do i=1,m2
+          C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+               A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+               A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+               A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+               A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
 
-        C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
-                                  A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
-                                  A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
-                                  A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
-                                  A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+          C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+               A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+               A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+               A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+               A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
 
-        C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
-                                  A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
-                                  A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
-                                  A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
-                                  A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-      enddo
+          C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+               A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+               A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+               A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+               A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+       enddo
     enddo
 
+    if( ATTENUATION_VAL ) then
+       ! temporary variables used for fixing attenuation in a consistent way
+       do j=1,m1
+          do i=1,m2
+             C1_mxm_m2_m1_5points_att(i,j) = A1_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
+                  A1_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
+                  A1_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
+                  A1_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
+                  A1_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
+
+             C2_mxm_m2_m1_5points_att(i,j) = A2_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
+                  A2_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
+                  A2_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
+                  A2_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
+                  A2_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
+
+             C3_mxm_m2_m1_5points_att(i,j) = A3_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
+                  A3_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
+                  A3_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
+                  A3_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
+                  A3_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
+          enddo
+       enddo
+    endif
+
     !
     ! compute either isotropic, transverse isotropic or anisotropic elements
     !
     if(ANISOTROPIC_3D_MANTLE_VAL) then
       ! anisotropic element
-      call compute_element_aniso(ispec, &
-                    minus_gravity_table,density_table,minus_deriv_gravity_table, &
-                    xstore,ystore,zstore, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    wgll_cube, &
-                    c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
-                    c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
-                    c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
-                    ibool, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilon_trace_over_3, &
-                    one_minus_sum_beta,vx,vy,vz,vnspec, &
-                    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
-                    dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
+
+       if( ATTENUATION_VAL ) then
+          call compute_element_aniso(ispec, &
+               minus_gravity_table,density_table,minus_deriv_gravity_table, &
+               xstore,ystore,zstore, &
+               xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+               wgll_cube, &
+               c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
+               c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
+               c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
+               ibool, &
+               R_xx,R_yy,R_xy,R_xz,R_yz, &
+               epsilon_trace_over_3, &
+               one_minus_sum_beta,vx,vy,vz,vnspec, &
+               tempx1_att,tempx2_att,tempx3_att, &
+               tempy1_att,tempy2_att,tempy3_att, &
+               tempz1_att,tempz2_att,tempz3_att, &
+               dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
+       else
+          call compute_element_aniso(ispec, &
+               minus_gravity_table,density_table,minus_deriv_gravity_table, &
+               xstore,ystore,zstore, &
+               xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+               wgll_cube, &
+               c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
+               c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
+               c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
+               ibool, &
+               R_xx,R_yy,R_xy,R_xz,R_yz, &
+               epsilon_trace_over_3, &
+               one_minus_sum_beta,vx,vy,vz,vnspec, &
+               tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
+               dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
+       endif
     else
       if( .not. ispec_is_tiso(ispec) ) then
         ! isotropic element
-        call compute_element_iso(ispec, &
-                    minus_gravity_table,density_table,minus_deriv_gravity_table, &
-                    xstore,ystore,zstore, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    wgll_cube, &
-                    kappavstore,muvstore, &
-                    ibool, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilon_trace_over_3, &
-                    one_minus_sum_beta,vx,vy,vz,vnspec, &
-                    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
-                    dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
+
+         if( ATTENUATION_VAL ) then
+            call compute_element_iso(ispec, &
+                 minus_gravity_table,density_table,minus_deriv_gravity_table, &
+                 xstore,ystore,zstore, &
+                 xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                 wgll_cube, &
+                 kappavstore,muvstore, &
+                 ibool, &
+                 R_xx,R_yy,R_xy,R_xz,R_yz, &
+                 epsilon_trace_over_3, &
+                 one_minus_sum_beta,vx,vy,vz,vnspec, &
+                 tempx1_att,tempx2_att,tempx3_att, &
+                 tempy1_att,tempy2_att,tempy3_att, &
+                 tempz1_att,tempz2_att,tempz3_att, &
+                 dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
+         else
+            call compute_element_iso(ispec, &
+                 minus_gravity_table,density_table,minus_deriv_gravity_table, &
+                 xstore,ystore,zstore, &
+                 xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                 wgll_cube, &
+                 kappavstore,muvstore, &
+                 ibool, &
+                 R_xx,R_yy,R_xy,R_xz,R_yz, &
+                 epsilon_trace_over_3, &
+                 one_minus_sum_beta,vx,vy,vz,vnspec, &
+                 tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
+                 dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
+         endif
+
       else
         ! transverse isotropic element
-        call compute_element_tiso(ispec, &
-                    minus_gravity_table,density_table,minus_deriv_gravity_table, &
-                    xstore,ystore,zstore, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    wgll_cube, &
-                    kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
-                    ibool, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilon_trace_over_3, &
-                    one_minus_sum_beta,vx,vy,vz,vnspec, &
-                    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
-                    dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
+
+         if( ATTENUATION_VAL ) then
+            call compute_element_tiso(ispec, &
+                 minus_gravity_table,density_table,minus_deriv_gravity_table, &
+                 xstore,ystore,zstore, &
+                 xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                 wgll_cube, &
+                 kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
+                 ibool, &
+                 R_xx,R_yy,R_xy,R_xz,R_yz, &
+                 epsilon_trace_over_3, &
+                 one_minus_sum_beta,vx,vy,vz,vnspec, &
+                 tempx1_att,tempx2_att,tempx3_att, &
+                 tempy1_att,tempy2_att,tempy3_att, &
+                 tempz1_att,tempz2_att,tempz3_att, &
+                 dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
+         else
+            call compute_element_tiso(ispec, &
+                 minus_gravity_table,density_table,minus_deriv_gravity_table, &
+                 xstore,ystore,zstore, &
+                 xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                 wgll_cube, &
+                 kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
+                 ibool, &
+                 R_xx,R_yy,R_xy,R_xz,R_yz, &
+                 epsilon_trace_over_3, &
+                 one_minus_sum_beta,vx,vy,vz,vnspec, &
+                 tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
+                 dummyx_loc,dummyy_loc,dummyz_loc,epsilondev_loc,rho_s_H)
+         endif
       endif ! .not. ispec_is_tiso
     endif
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90	2012-04-17 18:47:43 UTC (rev 19953)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90	2012-04-19 11:40:25 UTC (rev 19954)
@@ -78,7 +78,8 @@
         ! crust/mantle region
         call compute_forces_crust_mantle_Dev( NSPEC_CRUST_MANTLE_STR_OR_ATT,NGLOB_CRUST_MANTLE, &
                                       NSPEC_CRUST_MANTLE_ATTENUAT, &
-                                      displ_crust_mantle,accel_crust_mantle, &
+                                      deltat, &
+                                      displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
                                       phase_is_inner, &
                                       R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle, &
                                       R_xz_crust_mantle,R_yz_crust_mantle, &
@@ -91,7 +92,8 @@
         ! inner core region
         call compute_forces_inner_core_Dev( NSPEC_INNER_CORE_STR_OR_ATT,NGLOB_INNER_CORE, &
                                       NSPEC_INNER_CORE_ATTENUATION, &
-                                      displ_inner_core,accel_inner_core, &
+                                      deltat, &
+                                      displ_inner_core,veloc_inner_core,accel_inner_core, &
                                       phase_is_inner, &
                                       R_xx_inner_core,R_yy_inner_core,R_xy_inner_core,R_xz_inner_core,R_yz_inner_core, &
                                       epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
@@ -107,7 +109,8 @@
         ! crust/mantle region
         call compute_forces_crust_mantle(  NSPEC_CRUST_MANTLE_STR_OR_ATT,NGLOB_CRUST_MANTLE, &
                                       NSPEC_CRUST_MANTLE_ATTENUAT, &
-                                      displ_crust_mantle,accel_crust_mantle, &
+                                      deltat, &
+                                      displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
                                       phase_is_inner, &
                                       R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle, &
                                       R_xz_crust_mantle,R_yz_crust_mantle, &
@@ -120,7 +123,8 @@
         ! inner core region
         call compute_forces_inner_core( NSPEC_INNER_CORE_STR_OR_ATT,NGLOB_INNER_CORE, &
                                       NSPEC_INNER_CORE_ATTENUATION, &
-                                      displ_inner_core,accel_inner_core, &
+                                      deltat, &
+                                      displ_inner_core,veloc_inner_core,accel_inner_core, &
                                       phase_is_inner, &
                                       R_xx_inner_core,R_yy_inner_core,R_xy_inner_core,R_xz_inner_core,R_yz_inner_core, &
                                       epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
@@ -140,7 +144,8 @@
           ! crust/mantle region
           call compute_forces_crust_mantle_Dev( NSPEC_CRUST_MANTLE_ADJOINT,NGLOB_CRUST_MANTLE_ADJOINT, &
                                       NSPEC_CRUST_MANTLE_STR_AND_ATT, &
-                                      b_displ_crust_mantle,b_accel_crust_mantle, &
+                                      deltat, &
+                                      b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
                                       phase_is_inner, &
                                       b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
                                       b_R_xz_crust_mantle,b_R_yz_crust_mantle, &
@@ -154,7 +159,8 @@
           ! inner core region
           call compute_forces_inner_core_Dev( NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
                                         NSPEC_INNER_CORE_STR_AND_ATT, &
-                                        b_displ_inner_core,b_accel_inner_core, &
+                                        deltat, &
+                                        b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
                                         phase_is_inner, &
                                         b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
                                         b_R_xz_inner_core,b_R_yz_inner_core, &
@@ -171,7 +177,8 @@
           ! crust/mantle region
           call compute_forces_crust_mantle( NSPEC_CRUST_MANTLE_ADJOINT,NGLOB_CRUST_MANTLE_ADJOINT, &
                                         NSPEC_CRUST_MANTLE_STR_AND_ATT, &
-                                        b_displ_crust_mantle,b_accel_crust_mantle, &
+                                        deltat, &
+                                        b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
                                         phase_is_inner, &
                                         b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
                                         b_R_xz_crust_mantle,b_R_yz_crust_mantle, &
@@ -186,7 +193,8 @@
           ! inner core region
           call compute_forces_inner_core( NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
                                         NSPEC_INNER_CORE_STR_AND_ATT, &
-                                        b_displ_inner_core,b_accel_inner_core, &
+                                        deltat, &
+                                        b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
                                         phase_is_inner, &
                                         b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
                                         b_R_xz_inner_core,b_R_yz_inner_core, &
@@ -206,7 +214,7 @@
       ! for crust/mantle
       call compute_forces_crust_mantle_cuda(Mesh_pointer,deltat,iphase)
       ! for inner core
-      call compute_forces_inner_core_cuda(Mesh_pointer,iphase)
+      call compute_forces_inner_core_cuda(Mesh_pointer,deltat,iphase)
     endif ! GPU_MODE
 
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90	2012-04-17 18:47:43 UTC (rev 19953)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90	2012-04-19 11:40:25 UTC (rev 19954)
@@ -26,7 +26,10 @@
 !=====================================================================
 
   subroutine compute_forces_inner_core( NSPEC,NGLOB,NSPEC_ATT, &
-                                        displ_inner_core,accel_inner_core, &
+                                        deltat, &
+                                        displ_inner_core, &
+                                        veloc_inner_core, &
+                                        accel_inner_core, &
                                         phase_is_inner, &
                                         R_xx,R_yy,R_xy,R_xz,R_yz, &
                                         epsilondev_xx,epsilondev_yy,epsilondev_xy, &
@@ -61,9 +64,14 @@
 
   integer :: NSPEC,NGLOB,NSPEC_ATT
 
-  ! displacement and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_inner_core,accel_inner_core
+  ! time step
+  real(kind=CUSTOM_REAL) deltat
 
+  ! displacement, velocity and acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_inner_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: veloc_inner_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_inner_core
+
   ! for attenuation
   ! memory variables R_ij are stored at the local rather than global level
   ! to allow for optimization of cache access by compiler
@@ -119,6 +127,14 @@
   real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l
   real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l
 
+  real(kind=CUSTOM_REAL) tempx1l_att,tempx2l_att,tempx3l_att
+  real(kind=CUSTOM_REAL) tempy1l_att,tempy2l_att,tempy3l_att
+  real(kind=CUSTOM_REAL) tempz1l_att,tempz2l_att,tempz3l_att
+
+  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
+  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att;
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att;
+
   ! for gravity
   double precision radius,rho,minus_g,minus_dg
   double precision minus_g_over_radius,minus_dg_plus_g_over_radius
@@ -196,6 +212,57 @@
             tempz3l = tempz3l + displ_inner_core(3,iglob)*hp3
           enddo
 
+          if( ATTENUATION_VAL ) then
+             ! temporary variables used for fixing attenuation in a consistent way
+
+             tempx1l_att = 0._CUSTOM_REAL
+             tempx2l_att = 0._CUSTOM_REAL
+             tempx3l_att = 0._CUSTOM_REAL
+
+             tempy1l_att = 0._CUSTOM_REAL
+             tempy2l_att = 0._CUSTOM_REAL
+             tempy3l_att = 0._CUSTOM_REAL
+
+             tempz1l_att = 0._CUSTOM_REAL
+             tempz2l_att = 0._CUSTOM_REAL
+             tempz3l_att = 0._CUSTOM_REAL
+
+             ! use first order Taylor expansion of displacement for local storage of stresses 
+             ! at this current time step, to fix attenuation in a consistent way
+             do l=1,NGLLX
+                hp1 = hprime_xx(i,l)
+                iglob = ibool(l,j,k,ispec)
+                tempx1l_att = tempx1l_att + &
+                     (displ_inner_core(1,iglob) + deltat*veloc_inner_core(1,iglob))*hp1
+                tempy1l_att = tempy1l_att + &
+                     (displ_inner_core(2,iglob) + deltat*veloc_inner_core(2,iglob))*hp1
+                tempz1l_att = tempz1l_att + &
+                     (displ_inner_core(3,iglob) + deltat*veloc_inner_core(3,iglob))*hp1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+                hp2 = hprime_yy(j,l)
+                iglob = ibool(i,l,k,ispec)
+                tempx2l_att = tempx2l_att + &
+                     (displ_inner_core(1,iglob) + deltat*veloc_inner_core(1,iglob))*hp2
+                tempy2l_att = tempy2l_att + &
+                     (displ_inner_core(2,iglob) + deltat*veloc_inner_core(2,iglob))*hp2
+                tempz2l_att = tempz2l_att + &
+                     (displ_inner_core(3,iglob) + deltat*veloc_inner_core(3,iglob))*hp2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+                hp3 = hprime_zz(k,l)
+                iglob = ibool(i,j,l,ispec)
+                tempx3l_att = tempx3l_att + &
+                     (displ_inner_core(1,iglob) + deltat*veloc_inner_core(1,iglob))*hp3
+                tempy3l_att = tempy3l_att + &
+                     (displ_inner_core(2,iglob) + deltat*veloc_inner_core(2,iglob))*hp3
+                tempz3l_att = tempz3l_att + &
+                     (displ_inner_core(3,iglob) + deltat*veloc_inner_core(3,iglob))*hp3
+             enddo
+          endif
+
 !         get derivatives of ux, uy and uz with respect to x, y and z
 
           xixl = xix(i,j,k,ispec)
@@ -233,19 +300,54 @@
           duzdxl_plus_duxdzl = duzdxl + duxdzl
           duzdyl_plus_duydzl = duzdyl + duydzl
 
-! compute deviatoric strain
-          if (COMPUTE_AND_STORE_STRAIN) then
-            if(NSPEC_INNER_CORE_STRAIN_ONLY == 1) then
-              ispec_strain = 1
-            else
-              ispec_strain = ispec
-            endif
-            epsilon_trace_over_3(i,j,k,ispec_strain) = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-            epsilondev_loc(1,i,j,k) = duxdxl - epsilon_trace_over_3(i,j,k,ispec_strain)
-            epsilondev_loc(2,i,j,k) = duydyl - epsilon_trace_over_3(i,j,k,ispec_strain)
-            epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
-            epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
-            epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+          if( ATTENUATION_VAL ) then
+             ! temporary variables used for fixing attenuation in a consistent way
+             duxdxl_att = xixl*tempx1l_att + etaxl*tempx2l_att + gammaxl*tempx3l_att
+             duxdyl_att = xiyl*tempx1l_att + etayl*tempx2l_att + gammayl*tempx3l_att
+             duxdzl_att = xizl*tempx1l_att + etazl*tempx2l_att + gammazl*tempx3l_att
+
+             duydxl_att = xixl*tempy1l_att + etaxl*tempy2l_att + gammaxl*tempy3l_att
+             duydyl_att = xiyl*tempy1l_att + etayl*tempy2l_att + gammayl*tempy3l_att
+             duydzl_att = xizl*tempy1l_att + etazl*tempy2l_att + gammazl*tempy3l_att
+
+             duzdxl_att = xixl*tempz1l_att + etaxl*tempz2l_att + gammaxl*tempz3l_att
+             duzdyl_att = xiyl*tempz1l_att + etayl*tempz2l_att + gammayl*tempz3l_att
+             duzdzl_att = xizl*tempz1l_att + etazl*tempz2l_att + gammazl*tempz3l_att
+
+             ! precompute some sums to save CPU time
+             duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
+             duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
+             duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
+
+             ! compute deviatoric strain
+             if (COMPUTE_AND_STORE_STRAIN) then
+                if(NSPEC_INNER_CORE_STRAIN_ONLY == 1) then
+                   ispec_strain = 1
+                else
+                   ispec_strain = ispec
+                endif
+                epsilon_trace_over_3(i,j,k,ispec_strain) = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
+                epsilondev_loc(1,i,j,k) = duxdxl_att - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(2,i,j,k) = duydyl_att - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl_att
+                epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
+                epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl_att
+             endif
+          else
+             ! compute deviatoric strain
+             if (COMPUTE_AND_STORE_STRAIN) then
+                if(NSPEC_INNER_CORE_STRAIN_ONLY == 1) then
+                   ispec_strain = 1
+                else
+                   ispec_strain = ispec
+                endif
+                epsilon_trace_over_3(i,j,k,ispec_strain) = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+                epsilondev_loc(1,i,j,k) = duxdxl - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(2,i,j,k) = duydyl - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
+                epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
+                epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+             endif
           endif
 
           if(ATTENUATION_VAL) then

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90	2012-04-17 18:47:43 UTC (rev 19953)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90	2012-04-19 11:40:25 UTC (rev 19954)
@@ -35,7 +35,10 @@
 !          the original routines are commented with "! way 1", the hand-optimized routines with  "! way 2"
 
   subroutine compute_forces_inner_core_Dev( NSPEC,NGLOB,NSPEC_ATT, &
-                                            displ_inner_core,accel_inner_core, &
+                                            deltat, &
+                                            displ_inner_core, &
+                                            veloc_inner_core, &
+                                            accel_inner_core, &
                                             phase_is_inner, &
                                             R_xx,R_yy,R_xy,R_xz,R_yz, &
                                             epsilondev_xx,epsilondev_yy,epsilondev_xy, &
@@ -71,8 +74,13 @@
 
   integer :: NSPEC,NGLOB,NSPEC_ATT
 
+  ! time step
+  real(kind=CUSTOM_REAL) deltat
+
   ! displacement and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_inner_core,accel_inner_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_inner_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: veloc_inner_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_inner_core
 
   ! for attenuation
   ! memory variables R_ij are stored at the local rather than global level
@@ -117,6 +125,19 @@
   equivalence(newtempy1,E2_m1_m2_5points)
   equivalence(newtempz1,E3_m1_m2_5points)
 
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+    tempx1_att,tempx2_att,tempx3_att,tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc_att,dummyy_loc_att,dummyz_loc_att
+  real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points_att,B2_m1_m2_5points_att,B3_m1_m2_5points_att
+  real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points_att,C2_m1_m2_5points_att,C3_m1_m2_5points_att
+
+  equivalence(dummyx_loc_att,B1_m1_m2_5points_att)
+  equivalence(dummyy_loc_att,B2_m1_m2_5points_att)
+  equivalence(dummyz_loc_att,B3_m1_m2_5points_att)
+  equivalence(tempx1_att,C1_m1_m2_5points_att)
+  equivalence(tempy1_att,C2_m1_m2_5points_att)
+  equivalence(tempz1_att,C3_m1_m2_5points_att)
+
   real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
     A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
   real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
@@ -134,6 +155,18 @@
   equivalence(newtempy3,E2_mxm_m2_m1_5points)
   equivalence(newtempz3,E3_mxm_m2_m1_5points)
 
+  real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: &
+    A1_mxm_m2_m1_5points_att,A2_mxm_m2_m1_5points_att,A3_mxm_m2_m1_5points_att
+  real(kind=CUSTOM_REAL), dimension(m2,m1) :: &
+    C1_mxm_m2_m1_5points_att,C2_mxm_m2_m1_5points_att,C3_mxm_m2_m1_5points_att
+
+  equivalence(dummyx_loc_att,A1_mxm_m2_m1_5points_att)
+  equivalence(dummyy_loc_att,A2_mxm_m2_m1_5points_att)
+  equivalence(dummyz_loc_att,A3_mxm_m2_m1_5points_att)
+  equivalence(tempx3_att,C1_mxm_m2_m1_5points_att)
+  equivalence(tempy3_att,C2_mxm_m2_m1_5points_att)
+  equivalence(tempz3_att,C3_mxm_m2_m1_5points_att)
+
   real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms
   real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
 
@@ -143,6 +176,9 @@
   real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
   real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
 
+  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att,duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att
+
   real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
 
   real(kind=CUSTOM_REAL) fac1,fac2,fac3,templ
@@ -202,6 +238,7 @@
 
       do k=1,NGLLZ
         do j=1,NGLLY
+
 #ifdef _HANDOPT
 ! way 2:
         ! since we know that NGLLX = 5, this should help pipelining
@@ -238,74 +275,200 @@
 #endif
         enddo
       enddo
+      
+      if( ATTENUATION_VAL ) then
+         ! use first order Taylor expansion of displacement for local storage of stresses 
+         ! at this current time step, to fix attenuation in a consistent way
 
+         do k=1,NGLLZ
+            do j=1,NGLLY
+
+#ifdef _HANDOPT
+               ! way 2:
+               ! since we know that NGLLX = 5, this should help pipelining
+               iglobv5(:) = ibool(:,j,k,ispec)
+
+               dummyx_loc_att(1,j,k) = displ_inner_core(1,iglobv5(1)) + deltat*veloc_inner_core(1,iglobv5(1))
+               dummyy_loc_att(1,j,k) = displ_inner_core(2,iglobv5(1)) + deltat*veloc_inner_core(2,iglobv5(1))
+               dummyz_loc_att(1,j,k) = displ_inner_core(3,iglobv5(1)) + deltat*veloc_inner_core(3,iglobv5(1))
+
+               dummyx_loc_att(2,j,k) = displ_inner_core(1,iglobv5(2)) + deltat*veloc_inner_core(1,iglobv5(2))
+               dummyy_loc_att(2,j,k) = displ_inner_core(2,iglobv5(2)) + deltat*veloc_inner_core(2,iglobv5(2))
+               dummyz_loc_att(2,j,k) = displ_inner_core(3,iglobv5(2)) + deltat*veloc_inner_core(3,iglobv5(2))
+
+               dummyx_loc_att(3,j,k) = displ_inner_core(1,iglobv5(3)) + deltat*veloc_inner_core(1,iglobv5(3))
+               dummyy_loc_att(3,j,k) = displ_inner_core(2,iglobv5(3)) + deltat*veloc_inner_core(2,iglobv5(3))
+               dummyz_loc_att(3,j,k) = displ_inner_core(3,iglobv5(3)) + deltat*veloc_inner_core(3,iglobv5(3))
+
+               dummyx_loc_att(4,j,k) = displ_inner_core(1,iglobv5(4)) + deltat*veloc_inner_core(1,iglobv5(4))
+               dummyy_loc_att(4,j,k) = displ_inner_core(2,iglobv5(4)) + deltat*veloc_inner_core(2,iglobv5(4))
+               dummyz_loc_att(4,j,k) = displ_inner_core(3,iglobv5(4)) + deltat*veloc_inner_core(3,iglobv5(4))
+
+               dummyx_loc_att(5,j,k) = displ_inner_core(1,iglobv5(5)) + deltat*veloc_inner_core(1,iglobv5(5))
+               dummyy_loc_att(5,j,k) = displ_inner_core(2,iglobv5(5)) + deltat*veloc_inner_core(2,iglobv5(5))
+               dummyz_loc_att(5,j,k) = displ_inner_core(3,iglobv5(5)) + deltat*veloc_inner_core(3,iglobv5(5))
+
+#else
+               ! way 1:
+               do i=1,NGLLX
+                  iglob1 = ibool(i,j,k,ispec)
+                  dummyx_loc_att(i,j,k) = displ_inner_core(1,iglob1) + deltat*veloc_inner_core(1,iglob1)
+                  dummyy_loc_att(i,j,k) = displ_inner_core(2,iglob1) + deltat*veloc_inner_core(2,iglob1)
+                  dummyz_loc_att(i,j,k) = displ_inner_core(3,iglob1) + deltat*veloc_inner_core(3,iglob1)
+               enddo
+
+#endif
+            enddo
+         enddo
+      endif
+
       do j=1,m2
-        do i=1,m1
-          C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
-                                hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
-                                hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
-                                hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
-                                hprime_xx(i,5)*B1_m1_m2_5points(5,j)
+         do i=1,m1
+            C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
+                 hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
+                 hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
+                 hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
+                 hprime_xx(i,5)*B1_m1_m2_5points(5,j)
 
-          C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
-                                hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
-                                hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
-                                hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
-                                hprime_xx(i,5)*B2_m1_m2_5points(5,j)
+            C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
+                 hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
+                 hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
+                 hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
+                 hprime_xx(i,5)*B2_m1_m2_5points(5,j)
 
-          C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
-                                hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
-                                hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
-                                hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
-                                hprime_xx(i,5)*B3_m1_m2_5points(5,j)
-        enddo
+            C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
+                 hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
+                 hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
+                 hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
+                 hprime_xx(i,5)*B3_m1_m2_5points(5,j)
+         enddo
       enddo
+
+      if( ATTENUATION_VAL ) then
+         ! temporary variables used for fixing attenuation in a consistent way
+         do j=1,m2
+            do i=1,m1
+               C1_m1_m2_5points_att(i,j) = hprime_xx(i,1)*B1_m1_m2_5points_att(1,j) + &
+                    hprime_xx(i,2)*B1_m1_m2_5points_att(2,j) + &
+                    hprime_xx(i,3)*B1_m1_m2_5points_att(3,j) + &
+                    hprime_xx(i,4)*B1_m1_m2_5points_att(4,j) + &
+                    hprime_xx(i,5)*B1_m1_m2_5points_att(5,j)
+
+               C2_m1_m2_5points_att(i,j) = hprime_xx(i,1)*B2_m1_m2_5points_att(1,j) + &
+                    hprime_xx(i,2)*B2_m1_m2_5points_att(2,j) + &
+                    hprime_xx(i,3)*B2_m1_m2_5points_att(3,j) + &
+                    hprime_xx(i,4)*B2_m1_m2_5points_att(4,j) + &
+                    hprime_xx(i,5)*B2_m1_m2_5points_att(5,j)
+
+               C3_m1_m2_5points_att(i,j) = hprime_xx(i,1)*B3_m1_m2_5points_att(1,j) + &
+                    hprime_xx(i,2)*B3_m1_m2_5points_att(2,j) + &
+                    hprime_xx(i,3)*B3_m1_m2_5points_att(3,j) + &
+                    hprime_xx(i,4)*B3_m1_m2_5points_att(4,j) + &
+                    hprime_xx(i,5)*B3_m1_m2_5points_att(5,j)
+            enddo
+         enddo
+      endif
+
       do j=1,m1
-        do i=1,m1
-          ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-          do k = 1,NGLLX
-            tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
-                          dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
-                          dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
-                          dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
-                          dummyx_loc(i,5,k)*hprime_xxT(5,j)
+         do i=1,m1
+            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+            do k = 1,NGLLX
+               tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
+                    dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
+                    dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
+                    dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
+                    dummyx_loc(i,5,k)*hprime_xxT(5,j)
 
-            tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
-                          dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
-                          dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
-                          dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
-                          dummyy_loc(i,5,k)*hprime_xxT(5,j)
+               tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
+                    dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
+                    dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
+                    dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
+                    dummyy_loc(i,5,k)*hprime_xxT(5,j)
 
-            tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
-                          dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
-                          dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
-                          dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
-                          dummyz_loc(i,5,k)*hprime_xxT(5,j)
-          enddo
-        enddo
+               tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
+                    dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
+                    dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
+                    dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
+                    dummyz_loc(i,5,k)*hprime_xxT(5,j)
+            enddo
+         enddo
       enddo
+
+      if( ATTENUATION_VAL ) then
+         ! temporary variables used for fixing attenuation in a consistent way
+         do j=1,m1
+            do i=1,m1
+               ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+               do k = 1,NGLLX
+                  tempx2_att(i,j,k) = dummyx_loc_att(i,1,k)*hprime_xxT(1,j) + &
+                       dummyx_loc_att(i,2,k)*hprime_xxT(2,j) + &
+                       dummyx_loc_att(i,3,k)*hprime_xxT(3,j) + &
+                       dummyx_loc_att(i,4,k)*hprime_xxT(4,j) + &
+                       dummyx_loc_att(i,5,k)*hprime_xxT(5,j)
+
+                  tempy2_att(i,j,k) = dummyy_loc_att(i,1,k)*hprime_xxT(1,j) + &
+                       dummyy_loc_att(i,2,k)*hprime_xxT(2,j) + &
+                       dummyy_loc_att(i,3,k)*hprime_xxT(3,j) + &
+                       dummyy_loc_att(i,4,k)*hprime_xxT(4,j) + &
+                       dummyy_loc_att(i,5,k)*hprime_xxT(5,j)
+
+                  tempz2_att(i,j,k) = dummyz_loc_att(i,1,k)*hprime_xxT(1,j) + &
+                       dummyz_loc_att(i,2,k)*hprime_xxT(2,j) + &
+                       dummyz_loc_att(i,3,k)*hprime_xxT(3,j) + &
+                       dummyz_loc_att(i,4,k)*hprime_xxT(4,j) + &
+                       dummyz_loc_att(i,5,k)*hprime_xxT(5,j)
+               enddo
+            enddo
+         enddo
+      endif
+
       do j=1,m1
-        do i=1,m2
-          C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
-                                    A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
-                                    A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
-                                    A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
-                                    A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+         do i=1,m2
+            C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+                 A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+                 A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+                 A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+                 A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
 
-          C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
-                                    A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
-                                    A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
-                                    A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
-                                    A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+            C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+                 A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+                 A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+                 A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+                 A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
 
-          C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
-                                    A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
-                                    A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
-                                    A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
-                                    A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-        enddo
+            C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+                 A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+                 A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+                 A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+                 A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+         enddo
       enddo
 
+      if( ATTENUATION_VAL ) then
+         ! temporary variables used for fixing attenuation in a consistent way
+         do j=1,m1
+            do i=1,m2
+               C1_mxm_m2_m1_5points_att(i,j) = A1_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
+                    A1_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
+                    A1_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
+                    A1_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
+                    A1_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
+
+               C2_mxm_m2_m1_5points_att(i,j) = A2_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
+                    A2_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
+                    A2_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
+                    A2_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
+                    A2_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
+
+               C3_mxm_m2_m1_5points_att(i,j) = A3_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
+                    A3_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
+                    A3_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
+                    A3_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
+                    A3_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
+            enddo
+         enddo
+      endif
+
       do k=1,NGLLZ
         do j=1,NGLLY
           do i=1,NGLLX
@@ -346,20 +509,56 @@
             duzdxl_plus_duxdzl = duzdxl + duxdzl
             duzdyl_plus_duydzl = duzdyl + duydzl
 
-            ! compute deviatoric strain
-            if (COMPUTE_AND_STORE_STRAIN) then
-              if(NSPEC_INNER_CORE_STRAIN_ONLY == 1) then
-                ispec_strain = 1
-              else
-                ispec_strain = ispec
-              endif
-              templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-              epsilon_trace_over_3(i,j,k,ispec_strain) = templ
-              epsilondev_loc(1,i,j,k) = duxdxl - templ
-              epsilondev_loc(2,i,j,k) = duydyl - templ
-              epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
-              epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
-              epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+            if( ATTENUATION_VAL ) then
+               ! temporary variables used for fixing attenuation in a consistent way
+               duxdxl_att = xixl*tempx1_att(i,j,k) + etaxl*tempx2_att(i,j,k) + gammaxl*tempx3_att(i,j,k)
+               duxdyl_att = xiyl*tempx1_att(i,j,k) + etayl*tempx2_att(i,j,k) + gammayl*tempx3_att(i,j,k)
+               duxdzl_att = xizl*tempx1_att(i,j,k) + etazl*tempx2_att(i,j,k) + gammazl*tempx3_att(i,j,k)
+
+               duydxl_att = xixl*tempy1_att(i,j,k) + etaxl*tempy2_att(i,j,k) + gammaxl*tempy3_att(i,j,k)
+               duydyl_att = xiyl*tempy1_att(i,j,k) + etayl*tempy2_att(i,j,k) + gammayl*tempy3_att(i,j,k)
+               duydzl_att = xizl*tempy1_att(i,j,k) + etazl*tempy2_att(i,j,k) + gammazl*tempy3_att(i,j,k)
+
+               duzdxl_att = xixl*tempz1_att(i,j,k) + etaxl*tempz2_att(i,j,k) + gammaxl*tempz3_att(i,j,k)
+               duzdyl_att = xiyl*tempz1_att(i,j,k) + etayl*tempz2_att(i,j,k) + gammayl*tempz3_att(i,j,k)
+               duzdzl_att = xizl*tempz1_att(i,j,k) + etazl*tempz2_att(i,j,k) + gammazl*tempz3_att(i,j,k)
+
+               ! precompute some sums to save CPU time
+               duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
+               duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
+               duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
+
+               ! compute deviatoric strain
+               if (COMPUTE_AND_STORE_STRAIN) then
+                  if(NSPEC_INNER_CORE_STRAIN_ONLY == 1) then
+                     ispec_strain = 1
+                  else
+                     ispec_strain = ispec
+                  endif
+                  templ = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
+                  epsilon_trace_over_3(i,j,k,ispec_strain) = templ
+                  epsilondev_loc(1,i,j,k) = duxdxl_att - templ
+                  epsilondev_loc(2,i,j,k) = duydyl_att - templ
+                  epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl_att
+                  epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
+                  epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl_att
+               endif
+            else
+               ! compute deviatoric strain
+               if (COMPUTE_AND_STORE_STRAIN) then
+                  if(NSPEC_INNER_CORE_STRAIN_ONLY == 1) then
+                     ispec_strain = 1
+                  else
+                     ispec_strain = ispec
+                  endif
+                  templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+                  epsilon_trace_over_3(i,j,k,ispec_strain) = templ
+                  epsilondev_loc(1,i,j,k) = duxdxl - templ
+                  epsilondev_loc(2,i,j,k) = duydyl - templ
+                  epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
+                  epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
+                  epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+               endif
             endif
 
             if(ATTENUATION_VAL) then



More information about the CIG-COMMITS mailing list