[cig-commits] r19212 - in seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER: examples/waterlayered_halfspace/in_data_files src/cuda src/generate_databases src/shared src/specfem3D

danielpeter at geodynamics.org danielpeter at geodynamics.org
Wed Nov 16 17:58:40 PST 2011


Author: danielpeter
Date: 2011-11-16 17:58:40 -0800 (Wed, 16 Nov 2011)
New Revision: 19212

Added:
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/make_gravity.f90
Modified:
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/STATIONS
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_coupling_surfaces.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_parameter_file.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_gradient.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90
   seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
Log:
adds gravity

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/Par_file	2011-11-17 01:58:40 UTC (rev 19212)
@@ -22,6 +22,7 @@
 ATTENUATION                     = .false.
 USE_OLSEN_ATTENUATION           = .false.
 ANISOTROPY                      = .false.
+GRAVITY                         = .false.
 
 # absorbing boundary conditions for a regional simulation
 ABSORBING_CONDITIONS            = .true.
@@ -29,7 +30,7 @@
 # save AVS or OpenDX movies
 MOVIE_SURFACE                   = .false.
 MOVIE_VOLUME                    = .false.
-NTSTEP_BETWEEN_FRAMES           = 200
+NTSTEP_BETWEEN_FRAMES           = 100
 CREATE_SHAKEMAP                 = .false.
 SAVE_DISPLACEMENT               = .false.
 USE_HIGHRES_FOR_MOVIES          = .false.

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/STATIONS
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/STATIONS	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/examples/waterlayered_halfspace/in_data_files/STATIONS	2011-11-17 01:58:40 UTC (rev 19212)
@@ -3,11 +3,20 @@
 X30 DB 67000.00 34696.43 0.0 100.0
 X40 DB 67000.00 46660.71 0.0 100.0
 X50 DB 67000.00 58625.00 0.0 100.0
-X50 DB 67000.00 67000.00 0.0 100.0
+X55 DB 67000.00 67000.00 0.0 100.0
 X60 DB 67000.00 75375.00 0.0 100.0
 X70 DB 67000.00 87339.29 0.0 100.0
 X80 DB 67000.00 99303.57 0.0 100.0
 X90 DB 67000.00 111269.86 0.0 100.0
 X1 DB 67000.00 0.0 0.0 100.0
 X2 DB 67000.00 67000.0 0.0 25000.0
-X3 DB 67000.00 134000.0 0.0 100.0
\ No newline at end of file
+X3 DB 67000.00 134000.0 0.0 100.0
+X4 DB 67000.00 67000.0 0.0 3500.0
+X5 DB 67000.00 67000.0 0.0 3000.2
+X6 DB 67000.00 67000.0 0.0 3000.1
+X7 DB 67000.00 67000.0 0.0 2999.9
+X8 DB 67000.00 67000.0 0.0 2999.8
+Y1 DB 67000.00 58625.0 0.0 2999.9
+Y2 DB 67000.00 58625.0 0.0 3000.1
+Y3 DB 67000.00 46660.71 0.0 2999.9
+Y4 DB 67000.00 46660.71 0.0 3000.1

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_add_sources_acoustic_cuda.cu	2011-11-17 01:58:40 UTC (rev 19212)
@@ -234,7 +234,7 @@
         int i = threadIdx.x;
         int j = threadIdx.y;
         int k = threadIdx.z;
-        
+
         int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-1;
 
         //kappal = kappastore[INDEX4(5,5,5,i,j,k,ispec)];
@@ -251,10 +251,10 @@
         //
         // note: we take the first component of the adj_sourcearrays
         //          the idea is to have e.g. a pressure source, where all 3 components would be the same
-        realw stf = adj_sourcearrays[INDEX5(5,5,5,3,i,j,k,0,irec_local)]; // / kappal 
-                                            
+        realw stf = adj_sourcearrays[INDEX5(5,5,5,3,i,j,k,0,irec_local)]; // / kappal
+
         atomicAdd(&potential_dot_dot_acoustic[iglob],stf);
-        
+
                   //+adj_sourcearrays[INDEX6(nadj_rec_local,NTSTEP_BETWEEN_ADJSRC,3,5,5,
                   //                         pre_computed_irec_local_index[irec],pre_computed_index-1,
                   //                         0,i,j,k)] // / kappal

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_coupling_cuda.cu	2011-11-17 01:58:40 UTC (rev 19212)
@@ -190,7 +190,11 @@
                                                     realw* coupling_ac_el_jacobian2Dw,
                                                     int* ibool,
                                                     int* ispec_is_inner,
-                                                    int phase_is_inner) {
+                                                    int phase_is_inner,
+                                                    int gravity,
+                                                    realw* minus_g,
+                                                    realw* rhostore,
+                                                    realw* displ) {
 
   int igll = threadIdx.x;
   int iface = blockIdx.x + gridDim.x*blockIdx.y;
@@ -199,6 +203,7 @@
   realw pressure;
   realw nx,ny,nz;
   realw jacobianw;
+  realw rhol;
 
   if( iface < num_coupling_ac_el_faces){
 
@@ -216,10 +221,8 @@
       k = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
       iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)]-  1;
 
-      // acoustic pressure on global point
-      pressure = - potential_dot_dot_acoustic[iglob];
-
       // gets associated normal on GLL point
+      // note: normal points away from acoustic element
       nx = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; // (1,igll,iface)
       ny = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,1,igll,iface)]; // (2,igll,iface)
       nz = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,2,igll,iface)]; // (3,igll,iface)
@@ -227,6 +230,23 @@
       // gets associated, weighted jacobian
       jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
 
+      // acoustic pressure on global point
+      if( gravity ){
+        // takes density (from acoustic? element)
+        rhol = rhostore[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+
+        // note: uses potential chi such that displacement s = grad(chi),
+        //         pressure becomes: p = - kappa ( div( s ) ) = rho ( - dot_dot_chi + g * s )
+        //  g only acting in negative z-direction
+        // daniel: TODO - check coupling would be displ * nz  correct?
+        pressure = rhol*( - potential_dot_dot_acoustic[iglob]
+                         + minus_g[iglob] * displ[iglob*3+2] );
+
+      }else{
+        // no gravity: uses potential chi such that displacement s = 1/rho grad(chi)
+        pressure = - potential_dot_dot_acoustic[iglob];
+      }
+
       // continuity of displacement and pressure on global point
       //
       // note: newark time scheme together with definition of scalar potential:
@@ -284,7 +304,11 @@
                                                        mp->d_coupling_ac_el_jacobian2Dw,
                                                        mp->d_ibool,
                                                        mp->d_ispec_is_inner,
-                                                       phase_is_inner);
+                                                       phase_is_inner,
+                                                       mp->gravity,
+                                                       mp->d_minus_g,
+                                                       mp->d_rhostore,
+                                                       mp->d_displ);
 
   //  adjoint simulations
   if (SIMULATION_TYPE == 3 ){
@@ -297,7 +321,11 @@
                                                          mp->d_coupling_ac_el_jacobian2Dw,
                                                          mp->d_ibool,
                                                          mp->d_ispec_is_inner,
-                                                         phase_is_inner);
+                                                         phase_is_inner,
+                                                         mp->gravity,
+                                                         mp->d_minus_g,
+                                                         mp->d_rhostore,
+                                                         mp->d_b_displ);
 
   }
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_acoustic_cuda.cu	2011-11-17 01:58:40 UTC (rev 19212)
@@ -256,7 +256,11 @@
                                        realw* hprime_xx, realw* hprime_yy, realw* hprime_zz,
                                        realw* hprimewgll_xx, realw* hprimewgll_yy, realw* hprimewgll_zz,
                                        realw* wgllwgll_xy,realw* wgllwgll_xz,realw* wgllwgll_yz,
-                                       realw* d_rhostore){
+                                       realw* d_rhostore,
+                                       int gravity,
+                                       realw* minus_g,
+                                       realw* d_kappastore,
+                                       realw* wgll_cube){
 
   int bx = blockIdx.y*gridDim.x+blockIdx.x;
   int tx = threadIdx.x;
@@ -268,64 +272,68 @@
   int J = ((tx-K*NGLL2)/NGLLX);
   int I = (tx-K*NGLL2-J*NGLLX);
 
-  int active,offset,offset1,offset2,offset3;
+  int active,offset;
   int iglob = 0;
   int working_element;
   reald temp1l,temp2l,temp3l;
   reald xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl;
   reald dpotentialdxl,dpotentialdyl,dpotentialdzl;
-  reald fac1,fac2,fac3,rho_invl;
+  reald fac1,fac2,fac3;
+  reald rho_invl,kappa_invl;
+  reald sum_terms;
+  reald gravity_term;
 
 #ifndef MANUALLY_UNROLLED_LOOPS
-    int l;
-    realw hp1,hp2,hp3;
+  int l;
+  int offset1,offset2,offset3;
+  realw hp1,hp2,hp3;
 #endif
 
-    __shared__ reald s_dummy_loc[NGLL3];
+  __shared__ reald s_dummy_loc[NGLL3];
 
-    __shared__ reald s_temp1[NGLL3];
-    __shared__ reald s_temp2[NGLL3];
-    __shared__ reald s_temp3[NGLL3];
+  __shared__ reald s_temp1[NGLL3];
+  __shared__ reald s_temp2[NGLL3];
+  __shared__ reald s_temp3[NGLL3];
 
 // use only NGLL^3 = 125 active threads, plus 3 inactive/ghost threads,
 // because we used memory padding from NGLL^3 = 125 to 128 to get coalescent memory accesses
-    active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
+  active = (tx < NGLL3 && bx < nb_blocks_to_compute) ? 1:0;
 
 // copy from global memory to shared memory
 // each thread writes one of the NGLL^3 = 125 data points
-    if (active) {
+  if (active) {
 
 #ifdef USE_MESH_COLORING_GPU
+    working_element = bx;
+#else
+    //mesh coloring
+    if( use_mesh_coloring_gpu ){
       working_element = bx;
-#else
-      //mesh coloring
-      if( use_mesh_coloring_gpu ){
-        working_element = bx;
-      }else{
-        // iphase-1 and working_element-1 for Fortran->C array conventions
-        working_element = d_phase_ispec_inner_acoustic[bx + num_phase_ispec_acoustic*(d_iphase-1)]-1;
-      }
+    }else{
+      // iphase-1 and working_element-1 for Fortran->C array conventions
+      working_element = d_phase_ispec_inner_acoustic[bx + num_phase_ispec_acoustic*(d_iphase-1)]-1;
+    }
 #endif
 
-      // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
-      iglob = d_ibool[working_element*NGLL3 + tx]-1;
+    // iglob = d_ibool[working_element*NGLL3_ALIGN + tx]-1;
+    iglob = d_ibool[working_element*NGLL3 + tx]-1;
 
 #ifdef USE_TEXTURES
-      s_dummy_loc[tx] = tex1Dfetch(tex_potential_acoustic, iglob);
+    s_dummy_loc[tx] = tex1Dfetch(tex_potential_acoustic, iglob);
 #else
-  // changing iglob indexing to match fortran row changes fast style
-      s_dummy_loc[tx] = d_potential_acoustic[iglob];
+    // changing iglob indexing to match fortran row changes fast style
+    s_dummy_loc[tx] = d_potential_acoustic[iglob];
 #endif
-    }
+  }
 
 // synchronize all the threads (one thread for each of the NGLL grid points of the
 // current spectral element) because we need the whole element to be ready in order
 // to be able to compute the matrix products along cut planes of the 3D element below
-    __syncthreads();
+  __syncthreads();
 
 #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
 
-    if (active) {
+  if (active) {
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
 //      if(iglob == 0 )printf("kernel 2: iglob %i  hprime_xx %f %f %f \n",iglob,hprime_xx[0],hprime_xx[1],hprime_xx[2]);
@@ -334,161 +342,177 @@
 
 #ifndef MANUALLY_UNROLLED_LOOPS
 
-        temp1l = 0.f;
-        temp2l = 0.f;
-        temp3l = 0.f;
+    temp1l = 0.f;
+    temp2l = 0.f;
+    temp3l = 0.f;
 
+    for (l=0;l<NGLLX;l++) {
+        hp1 = hprime_xx[l*NGLLX+I];
+        offset1 = K*NGLL2+J*NGLLX+l;
+        temp1l += s_dummy_loc[offset1]*hp1;
 
-        for (l=0;l<NGLLX;l++) {
-            hp1 = hprime_xx[l*NGLLX+I];
-            offset1 = K*NGLL2+J*NGLLX+l;
-            temp1l += s_dummy_loc[offset1]*hp1;
+        //no more assumes that hprime_xx = hprime_yy = hprime_zz
+        hp2 = hprime_yy[l*NGLLX+J];
+        offset2 = K*NGLL2+l*NGLLX+I;
+        temp2l += s_dummy_loc[offset2]*hp2;
 
-            //no more assumes that hprime_xx = hprime_yy = hprime_zz
-            hp2 = hprime_yy[l*NGLLX+J];
-            offset2 = K*NGLL2+l*NGLLX+I;
-            temp2l += s_dummy_loc[offset2]*hp2;
-
-            hp3 = hprime_zz[l*NGLLX+K];
-            offset3 = l*NGLL2+J*NGLLX+I;
-            temp3l += s_dummy_loc[offset3]*hp3;
-        }
+        hp3 = hprime_zz[l*NGLLX+K];
+        offset3 = l*NGLL2+J*NGLLX+I;
+        temp3l += s_dummy_loc[offset3]*hp3;
+    }
 #else
 
-        temp1l = s_dummy_loc[K*NGLL2+J*NGLLX]*hprime_xx[I]
-                + s_dummy_loc[K*NGLL2+J*NGLLX+1]*hprime_xx[NGLLX+I]
-                + s_dummy_loc[K*NGLL2+J*NGLLX+2]*hprime_xx[2*NGLLX+I]
-                + s_dummy_loc[K*NGLL2+J*NGLLX+3]*hprime_xx[3*NGLLX+I]
-                + s_dummy_loc[K*NGLL2+J*NGLLX+4]*hprime_xx[4*NGLLX+I];
+    temp1l = s_dummy_loc[K*NGLL2+J*NGLLX]*hprime_xx[I]
+            + s_dummy_loc[K*NGLL2+J*NGLLX+1]*hprime_xx[NGLLX+I]
+            + s_dummy_loc[K*NGLL2+J*NGLLX+2]*hprime_xx[2*NGLLX+I]
+            + s_dummy_loc[K*NGLL2+J*NGLLX+3]*hprime_xx[3*NGLLX+I]
+            + s_dummy_loc[K*NGLL2+J*NGLLX+4]*hprime_xx[4*NGLLX+I];
 
-        temp2l = s_dummy_loc[K*NGLL2+I]*hprime_yy[J]
-                + s_dummy_loc[K*NGLL2+NGLLX+I]*hprime_yy[NGLLX+J]
-                + s_dummy_loc[K*NGLL2+2*NGLLX+I]*hprime_yy[2*NGLLX+J]
-                + s_dummy_loc[K*NGLL2+3*NGLLX+I]*hprime_yy[3*NGLLX+J]
-                + s_dummy_loc[K*NGLL2+4*NGLLX+I]*hprime_yy[4*NGLLX+J];
+    temp2l = s_dummy_loc[K*NGLL2+I]*hprime_yy[J]
+            + s_dummy_loc[K*NGLL2+NGLLX+I]*hprime_yy[NGLLX+J]
+            + s_dummy_loc[K*NGLL2+2*NGLLX+I]*hprime_yy[2*NGLLX+J]
+            + s_dummy_loc[K*NGLL2+3*NGLLX+I]*hprime_yy[3*NGLLX+J]
+            + s_dummy_loc[K*NGLL2+4*NGLLX+I]*hprime_yy[4*NGLLX+J];
 
-        temp3l = s_dummy_loc[J*NGLLX+I]*hprime_zz[K]
-                + s_dummy_loc[NGLL2+J*NGLLX+I]*hprime_zz[NGLLX+K]
-                + s_dummy_loc[2*NGLL2+J*NGLLX+I]*hprime_zz[2*NGLLX+K]
-                + s_dummy_loc[3*NGLL2+J*NGLLX+I]*hprime_zz[3*NGLLX+K]
-                + s_dummy_loc[4*NGLL2+J*NGLLX+I]*hprime_zz[4*NGLLX+K];
+    temp3l = s_dummy_loc[J*NGLLX+I]*hprime_zz[K]
+            + s_dummy_loc[NGLL2+J*NGLLX+I]*hprime_zz[NGLLX+K]
+            + s_dummy_loc[2*NGLL2+J*NGLLX+I]*hprime_zz[2*NGLLX+K]
+            + s_dummy_loc[3*NGLL2+J*NGLLX+I]*hprime_zz[3*NGLLX+K]
+            + s_dummy_loc[4*NGLL2+J*NGLLX+I]*hprime_zz[4*NGLLX+K];
 
 #endif
 
-        // compute derivatives of ux, uy and uz with respect to x, y and z
-        offset = working_element*NGLL3_ALIGN + tx;
+    // compute derivatives of ux, uy and uz with respect to x, y and z
+    offset = working_element*NGLL3_ALIGN + tx;
 
-        xixl = d_xix[offset];
-        xiyl = d_xiy[offset];
-        xizl = d_xiz[offset];
-        etaxl = d_etax[offset];
-        etayl = d_etay[offset];
-        etazl = d_etaz[offset];
-        gammaxl = d_gammax[offset];
-        gammayl = d_gammay[offset];
-        gammazl = d_gammaz[offset];
+    xixl = d_xix[offset];
+    xiyl = d_xiy[offset];
+    xizl = d_xiz[offset];
+    etaxl = d_etax[offset];
+    etayl = d_etay[offset];
+    etazl = d_etaz[offset];
+    gammaxl = d_gammax[offset];
+    gammayl = d_gammay[offset];
+    gammazl = d_gammaz[offset];
 
-        jacobianl = 1.f / (xixl*(etayl*gammazl-etazl*gammayl)
-                          -xiyl*(etaxl*gammazl-etazl*gammaxl)
-                          +xizl*(etaxl*gammayl-etayl*gammaxl));
+    jacobianl = 1.f / (xixl*(etayl*gammazl-etazl*gammayl)
+                      -xiyl*(etaxl*gammazl-etazl*gammaxl)
+                      +xizl*(etaxl*gammayl-etayl*gammaxl));
 
-        // derivatives of potential
-        dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l;
-        dpotentialdyl = xiyl*temp1l + etayl*temp2l + gammayl*temp3l;
-        dpotentialdzl = xizl*temp1l + etazl*temp2l + gammazl*temp3l;
+    // derivatives of potential
+    dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l;
+    dpotentialdyl = xiyl*temp1l + etayl*temp2l + gammayl*temp3l;
+    dpotentialdzl = xizl*temp1l + etazl*temp2l + gammazl*temp3l;
 
-        // density (reciproc)
-        rho_invl = 1.f / d_rhostore[offset];
+    // pre-computes gravity sum term
+    if( gravity ){
+      // uses potential definition: s = grad(chi)
 
-        // form the dot product with the test vector
-        s_temp1[tx] = jacobianl * rho_invl * (dpotentialdxl*xixl + dpotentialdyl*xiyl + dpotentialdzl*xizl);
-        s_temp2[tx] = jacobianl * rho_invl * (dpotentialdxl*etaxl + dpotentialdyl*etayl + dpotentialdzl*etazl);
-        s_temp3[tx] = jacobianl * rho_invl * (dpotentialdxl*gammaxl + dpotentialdyl*gammayl + dpotentialdzl*gammazl);
+      // gravity term: 1/kappa grad(chi) * g
+      // assumes that g only acts in (negative) z-direction
+      kappa_invl = 1.f / d_kappastore[working_element*NGLL3 + tx];
+
+      iglob = d_ibool[working_element*NGLL3 + tx]-1;
+      gravity_term = minus_g[iglob] * kappa_invl * jacobianl * wgll_cube[tx] * dpotentialdzl;
     }
 
+    // density (reciproc)
+    rho_invl = 1.f / d_rhostore[offset];
+
+    // form the dot product with the test vector
+    s_temp1[tx] = jacobianl * rho_invl * (dpotentialdxl*xixl + dpotentialdyl*xiyl + dpotentialdzl*xizl);
+    s_temp2[tx] = jacobianl * rho_invl * (dpotentialdxl*etaxl + dpotentialdyl*etayl + dpotentialdzl*etazl);
+    s_temp3[tx] = jacobianl * rho_invl * (dpotentialdxl*gammaxl + dpotentialdyl*gammayl + dpotentialdzl*gammazl);
+  }
+
 // synchronize all the threads (one thread for each of the NGLL grid points of the
 // current spectral element) because we need the whole element to be ready in order
 // to be able to compute the matrix products along cut planes of the 3D element below
-    __syncthreads();
+  __syncthreads();
 
-    if (active) {
+  if (active) {
 
 #ifndef MANUALLY_UNROLLED_LOOPS
 
-        temp1l = 0.f;
-        temp2l = 0.f;
-        temp3l = 0.f;
+    temp1l = 0.f;
+    temp2l = 0.f;
+    temp3l = 0.f;
 
-        for (l=0;l<NGLLX;l++) {
-            fac1 = hprimewgll_xx[I*NGLLX+l];
-            offset1 = K*NGLL2+J*NGLLX+l;
-            temp1l += s_temp1[offset1]*fac1;
+    for (l=0;l<NGLLX;l++) {
+        fac1 = hprimewgll_xx[I*NGLLX+l];
+        offset1 = K*NGLL2+J*NGLLX+l;
+        temp1l += s_temp1[offset1]*fac1;
 
-            //no more assumes hprimewgll_xx = hprimewgll_yy = hprimewgll_zz
-            fac2 = hprimewgll_yy[J*NGLLX+l];
-            offset2 = K*NGLL2+l*NGLLX+I;
-            temp2l += s_temp2[offset2]*fac2;
+        //no more assumes hprimewgll_xx = hprimewgll_yy = hprimewgll_zz
+        fac2 = hprimewgll_yy[J*NGLLX+l];
+        offset2 = K*NGLL2+l*NGLLX+I;
+        temp2l += s_temp2[offset2]*fac2;
 
-            fac3 = hprimewgll_zz[K*NGLLX+l];
-            offset3 = l*NGLL2+J*NGLLX+I;
-            temp3l += s_temp3[offset3]*fac3;
-        }
+        fac3 = hprimewgll_zz[K*NGLLX+l];
+        offset3 = l*NGLL2+J*NGLLX+I;
+        temp3l += s_temp3[offset3]*fac3;
+    }
 #else
 
-        temp1l = s_temp1[K*NGLL2+J*NGLLX]*hprimewgll_xx[I*NGLLX]
-                + s_temp1[K*NGLL2+J*NGLLX+1]*hprimewgll_xx[I*NGLLX+1]
-                + s_temp1[K*NGLL2+J*NGLLX+2]*hprimewgll_xx[I*NGLLX+2]
-                + s_temp1[K*NGLL2+J*NGLLX+3]*hprimewgll_xx[I*NGLLX+3]
-                + s_temp1[K*NGLL2+J*NGLLX+4]*hprimewgll_xx[I*NGLLX+4];
+    temp1l = s_temp1[K*NGLL2+J*NGLLX]*hprimewgll_xx[I*NGLLX]
+            + s_temp1[K*NGLL2+J*NGLLX+1]*hprimewgll_xx[I*NGLLX+1]
+            + s_temp1[K*NGLL2+J*NGLLX+2]*hprimewgll_xx[I*NGLLX+2]
+            + s_temp1[K*NGLL2+J*NGLLX+3]*hprimewgll_xx[I*NGLLX+3]
+            + s_temp1[K*NGLL2+J*NGLLX+4]*hprimewgll_xx[I*NGLLX+4];
 
 
-        temp2l = s_temp2[K*NGLL2+I]*hprimewgll_yy[J*NGLLX]
-                + s_temp2[K*NGLL2+NGLLX+I]*hprimewgll_yy[J*NGLLX+1]
-                + s_temp2[K*NGLL2+2*NGLLX+I]*hprimewgll_yy[J*NGLLX+2]
-                + s_temp2[K*NGLL2+3*NGLLX+I]*hprimewgll_yy[J*NGLLX+3]
-                + s_temp2[K*NGLL2+4*NGLLX+I]*hprimewgll_yy[J*NGLLX+4];
+    temp2l = s_temp2[K*NGLL2+I]*hprimewgll_yy[J*NGLLX]
+            + s_temp2[K*NGLL2+NGLLX+I]*hprimewgll_yy[J*NGLLX+1]
+            + s_temp2[K*NGLL2+2*NGLLX+I]*hprimewgll_yy[J*NGLLX+2]
+            + s_temp2[K*NGLL2+3*NGLLX+I]*hprimewgll_yy[J*NGLLX+3]
+            + s_temp2[K*NGLL2+4*NGLLX+I]*hprimewgll_yy[J*NGLLX+4];
 
 
-        temp3l = s_temp3[J*NGLLX+I]*hprimewgll_zz[K*NGLLX]
-                + s_temp3[NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+1]
-                + s_temp3[2*NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+2]
-                + s_temp3[3*NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+3]
-                + s_temp3[4*NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+4];
+    temp3l = s_temp3[J*NGLLX+I]*hprimewgll_zz[K*NGLLX]
+            + s_temp3[NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+1]
+            + s_temp3[2*NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+2]
+            + s_temp3[3*NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+3]
+            + s_temp3[4*NGLL2+J*NGLLX+I]*hprimewgll_zz[K*NGLLX+4];
 
 
 #endif
 
-        fac1 = wgllwgll_yz[K*NGLLX+J];
-        fac2 = wgllwgll_xz[K*NGLLX+I];
-        fac3 = wgllwgll_xy[J*NGLLX+I];
+    fac1 = wgllwgll_yz[K*NGLLX+J];
+    fac2 = wgllwgll_xz[K*NGLLX+I];
+    fac3 = wgllwgll_xy[J*NGLLX+I];
 
+    sum_terms = -(fac1*temp1l + fac2*temp2l + fac3*temp3l);
+    if( gravity ) sum_terms += gravity_term;
+
+    iglob = d_ibool[working_element*NGLL3 + tx]-1;
+
 #ifdef USE_TEXTURES
-        d_potential_dot_dot_acoustic[iglob] = tex1Dfetch(tex_potential_dot_dot_acoustic, iglob)
-                                              - (fac1*temp1l + fac2*temp2l + fac3*temp3l);
+    d_potential_dot_dot_acoustic[iglob] = tex1Dfetch(tex_potential_dot_dot_acoustic, iglob)
+                                            + sum_terms;
 #else
 
 #ifdef USE_MESH_COLORING_GPU
-      // no atomic operation needed, colors don't share global points between elements
-      d_potential_dot_dot_acoustic[iglob] -= (fac1*temp1l + fac2*temp2l + fac3*temp3l);
+    // no atomic operation needed, colors don't share global points between elements
+    d_potential_dot_dot_acoustic[iglob] += sum_terms;
 #else
-      //mesh coloring
-      if( use_mesh_coloring_gpu ){
+    //mesh coloring
+    if( use_mesh_coloring_gpu ){
 
-        // no atomic operation needed, colors don't share global points between elements
-        d_potential_dot_dot_acoustic[iglob] -= (fac1*temp1l + fac2*temp2l + fac3*temp3l);
+      // no atomic operation needed, colors don't share global points between elements
+      d_potential_dot_dot_acoustic[iglob] += sum_terms;
 
-      }else{
+    }else{
 
-        atomicAdd(&d_potential_dot_dot_acoustic[iglob],-(fac1*temp1l + fac2*temp2l + fac3*temp3l));
+      atomicAdd(&d_potential_dot_dot_acoustic[iglob],sum_terms);
 
-      }
+    }
 #endif
 
 #endif
-    }
+  }
 
 #else  // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
-        d_potential_dot_dot_acoustic[iglob] = 123.123f;
+  d_potential_dot_dot_acoustic[iglob] = 123.123f;
 #endif // of #ifndef MAKE_KERNEL2_BECOME_STUPID_FOR_TESTS
 }
 
@@ -507,7 +531,8 @@
                        realw* d_gammax,
                        realw* d_gammay,
                        realw* d_gammaz,
-                       realw* d_rhostore)
+                       realw* d_rhostore,
+                       realw* d_kappastore)
 {
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -550,7 +575,11 @@
                                                         mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
                                                         mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
                                                         mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
-                                                        d_rhostore);
+                                                        d_rhostore,
+                                                        mp->gravity,
+                                                        mp->d_minus_g,
+                                                        d_kappastore,
+                                                        mp->d_wgll_cube);
 
   if(SIMULATION_TYPE == 3) {
     Kernel_2_acoustic_impl<<< grid_2, threads_2, 0, 0 >>>(nb_blocks_to_compute,
@@ -567,7 +596,11 @@
                                                           mp->d_hprime_xx, mp->d_hprime_yy, mp->d_hprime_zz,
                                                           mp->d_hprimewgll_xx, mp->d_hprimewgll_yy, mp->d_hprimewgll_zz,
                                                           mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
-                                                          d_rhostore);
+                                                          d_rhostore,
+                                                          mp->gravity,
+                                                          mp->d_minus_g,
+                                                          d_kappastore,
+                                                          mp->d_wgll_cube);
   }
 
   // cudaEventRecord( stop, 0 );
@@ -660,7 +693,8 @@
                          mp->d_gammax + color_offset,
                          mp->d_gammay + color_offset,
                          mp->d_gammaz + color_offset,
-                         mp->d_rhostore + color_offset);
+                         mp->d_rhostore + color_offset,
+                         mp->d_kappastore + color_offset_nonpadded);
 
       // for padded and aligned arrays
       color_offset += nb_blocks_to_compute * NGLL3_PADDED;
@@ -683,7 +717,8 @@
                       mp->d_gammax,
                       mp->d_gammay,
                       mp->d_gammaz,
-                      mp->d_rhostore);
+                      mp->d_rhostore,
+                      mp->d_kappastore);
 
   }
 }

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_forces_elastic_cuda.cu	2011-11-17 01:58:40 UTC (rev 19212)
@@ -49,6 +49,7 @@
 __constant__ realw d_wgllwgll_xz[NGLL2];
 __constant__ realw d_wgllwgll_yz[NGLL2];
 
+__constant__ realw d_wgll_cube[NGLL3]; // needed only for gravity case
 
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -56,7 +57,7 @@
 // prepares a device array with with all inter-element edge-nodes -- this
 // is followed by a memcpy and MPI operations
 __global__ void prepare_boundary_accel_on_device(realw* d_accel, realw* d_send_accel_buffer,
-                                                 int num_interfaces_ext_mesh, 
+                                                 int num_interfaces_ext_mesh,
                                                  int max_nibool_interfaces_ext_mesh,
                                                  int* d_nibool_interfaces_ext_mesh,
                                                  int* d_ibool_interfaces_ext_mesh) {
@@ -395,17 +396,102 @@
   return;
 }
 
+/* ----------------------------------------------------------------------------------------------- */
 
+// pre-computes gravity term
+
+__device__ void compute_element_gravity(int tx,int working_element,
+                                        int* d_ibool,
+                                        realw* d_minus_g,
+                                        realw* d_minus_deriv_gravity,
+                                        realw* d_rhostore,
+                                        realw* wgll_cube,
+                                        reald jacobianl,
+                                        reald* s_dummyx_loc,
+                                        reald* s_dummyy_loc,
+                                        reald* s_dummyz_loc,
+                                        reald* sigma_xx,
+                                        reald* sigma_yy,
+                                        reald* sigma_xz,
+                                        reald* sigma_yz,
+                                        reald* rho_s_H1,
+                                        reald* rho_s_H2,
+                                        reald* rho_s_H3){
+
+  int iglob;
+  reald minus_g,minus_dg;
+  reald rhol;
+  reald gzl; // gxl,gyl,
+  reald sx_l,sy_l,sz_l;
+  reald Hxxl,Hyyl,Hzzl; //,Hxyl,Hxzl,Hyzl;
+  reald factor;
+
+  // compute non-symmetric terms for gravity
+
+  // get g, rho and dg/dr=dg
+  iglob = d_ibool[working_element*NGLL3 + tx]-1;
+
+  minus_g = d_minus_g[iglob];
+  minus_dg = d_minus_deriv_gravity[iglob];
+
+  rhol = d_rhostore[working_element*NGLL3_PADDED + tx];
+
+  // Cartesian components of the gravitational acceleration
+  //gxl = 0.f;
+  //gyl = 0.f;
+  gzl = minus_g;
+
+  // Cartesian components of gradient of gravitational acceleration
+  // H = grad g
+  // assumes g only acts in negative z-direction
+  Hxxl = 0.f;
+  Hyyl = 0.f;
+  Hzzl = minus_dg;
+  //Hxyl = 0.f;
+  //Hxzl = 0.f;
+  //Hyzl = 0.f;
+
+  // get displacement and multiply by density to compute G tensor
+  // G = rho [ sg - (s * g) I  ]
+  sx_l = rhol * s_dummyx_loc[tx]; // d_displ[iglob*3];
+  sy_l = rhol * s_dummyy_loc[tx]; // d_displ[iglob*3 + 1];
+  sz_l = rhol * s_dummyz_loc[tx]; // d_displ[iglob*3 + 2];
+
+  // compute G tensor from s . g and add to sigma (not symmetric)
+  //sigma_xx += sy_l*gyl + sz_l*gzl;
+  *sigma_xx += sz_l*gzl;
+  //sigma_yy += sx_l*gxl + sz_l*gzl;
+  *sigma_yy += sz_l*gzl;
+  //sigma_zz += sx_l*gxl + sy_l*gyl;
+
+  //sigma_xy -= sx_l*gyl;
+  //sigma_yx -= sy_l*gxl;
+
+  *sigma_xz -= sx_l*gzl;
+  //sigma_zx -= sz_l*gxl;
+
+  *sigma_yz -= sy_l*gzl;
+  //sigma_zy -= sz_l*gyl;
+
+  // precompute vector
+  factor = jacobianl * wgll_cube[tx];
+
+  //rho_s_H1 = fac1 * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl);
+  //rho_s_H2 = fac1 * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl);
+  //rho_s_H3 = fac1 * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl);
+  // only non-zero z-direction
+  *rho_s_H1 = factor * sx_l * Hxxl ; // 0.f;
+  *rho_s_H2 = factor * sy_l * Hyyl ; // 0.f;
+  *rho_s_H3 = factor * sz_l * Hzzl ;
+
+}
+
 /* ----------------------------------------------------------------------------------------------- */
 
 // double precision temporary variables leads to 10% performance
 // decrease in Kernel_2_impl (not very much..)
 //typedef realw reald;
 
-// doesn't seem to change the performance.
-// #define MANUALLY_UNROLLED_LOOPS
-
-
 __global__ void Kernel_2_impl(int nb_blocks_to_compute,
                               int NGLOB,
                               int* d_ibool,
@@ -448,7 +534,12 @@
                               realw* d_c46store,
                               realw* d_c55store,
                               realw* d_c56store,
-                              realw* d_c66store){
+                              realw* d_c66store,
+                              int gravity,
+                              realw* d_minus_g,
+                              realw* d_minus_deriv_gravity,
+                              realw* d_rhostore,
+                              realw* wgll_cube){
 
   /* int bx = blockIdx.y*blockDim.x+blockIdx.x; //possible bug in original code*/
   int bx = blockIdx.y*gridDim.x+blockIdx.x;
@@ -477,7 +568,12 @@
   reald sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz;
   reald epsilondev_xx_loc,epsilondev_yy_loc,epsilondev_xy_loc,epsilondev_xz_loc,epsilondev_yz_loc;
   reald c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,c33,c34,c35,c36,c44,c45,c46,c55,c56,c66;
+  reald sum_terms1,sum_terms2,sum_terms3;
 
+  // gravity variables
+  reald sigma_yx,sigma_zx,sigma_zy;
+  reald rho_s_H1,rho_s_H2,rho_s_H3;
+
 #ifndef MANUALLY_UNROLLED_LOOPS
     int l;
     realw hp1,hp2,hp3;
@@ -658,6 +754,7 @@
       duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l;
       duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l;
 
+      // precompute some sums to save CPU time
       duxdxl_plus_duydyl = duxdxl + duydyl;
       duxdxl_plus_duzdzl = duxdxl + duzdzl;
       duydyl_plus_duzdzl = duydyl + duzdzl;
@@ -753,7 +850,7 @@
       }
 
       if(ATTENUATION){
-        // subtract memory variables if attenuation
+        // subtracts memory variables if attenuation
         compute_element_att_stress(tx,working_element,NSPEC,
                                    R_xx,R_yy,R_xy,R_xz,R_yz,
                                    &sigma_xx,&sigma_yy,&sigma_zz,&sigma_xy,&sigma_xz,&sigma_yz);
@@ -761,17 +858,31 @@
 
       jacobianl = 1.0f / (xixl*(etayl*gammazl-etazl*gammayl)-xiyl*(etaxl*gammazl-etazl*gammaxl)+xizl*(etaxl*gammayl-etayl*gammaxl));
 
-// form the dot product with the test vector
-      s_tempx1[tx] = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl);
-      s_tempy1[tx] = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_yz*xizl);
+      // define symmetric components (needed for non-symmetric dot product and sigma for gravity)
+      sigma_yx = sigma_xy;
+      sigma_zx = sigma_xz;
+      sigma_zy = sigma_yz;
+
+      if( gravity ){
+        //  computes non-symmetric terms for gravity
+        compute_element_gravity(tx,working_element,d_ibool,d_minus_g,d_minus_deriv_gravity,
+                                d_rhostore,wgll_cube,jacobianl,
+                                s_dummyx_loc,s_dummyy_loc,s_dummyz_loc,
+                                &sigma_xx,&sigma_yy,&sigma_xz,&sigma_yz,
+                                &rho_s_H1,&rho_s_H2,&rho_s_H3);
+      }
+
+      // form dot product with test vector, non-symmetric form
+      s_tempx1[tx] = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl);
+      s_tempy1[tx] = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl);
       s_tempz1[tx] = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl);
 
-      s_tempx2[tx] = jacobianl * (sigma_xx*etaxl + sigma_xy*etayl + sigma_xz*etazl);
-      s_tempy2[tx] = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_yz*etazl);
+      s_tempx2[tx] = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl);
+      s_tempy2[tx] = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl);
       s_tempz2[tx] = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl);
 
-      s_tempx3[tx] = jacobianl * (sigma_xx*gammaxl + sigma_xy*gammayl + sigma_xz*gammazl);
-      s_tempy3[tx] = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_yz*gammazl);
+      s_tempx3[tx] = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl);
+      s_tempy3[tx] = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl);
       s_tempz3[tx] = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl);
 
     }
@@ -880,34 +991,50 @@
       fac2 = d_wgllwgll_xz[K*NGLLX+I];
       fac3 = d_wgllwgll_xy[J*NGLLX+I];
 
+      sum_terms1 = - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
+      sum_terms2 = - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
+      sum_terms3 = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+
+      // adds gravity term
+      if( gravity ){
+        sum_terms1 += rho_s_H1;
+        sum_terms2 += rho_s_H2;
+        sum_terms3 += rho_s_H3;
+      }
+
 #ifdef USE_TEXTURES
-      d_accel[iglob] = tex1Dfetch(tex_accel, iglob) - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
-      d_accel[iglob + NGLOB] = tex1Dfetch(tex_accel, iglob + NGLOB) - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
-      d_accel[iglob + 2*NGLOB] = tex1Dfetch(tex_accel, iglob + 2*NGLOB) - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+      d_accel[iglob] = tex1Dfetch(tex_accel, iglob) + sum_terms1);
+      d_accel[iglob + NGLOB] = tex1Dfetch(tex_accel, iglob + NGLOB) + sum_terms2);
+      d_accel[iglob + 2*NGLOB] = tex1Dfetch(tex_accel, iglob + 2*NGLOB) + sum_terms3);
 #else
   /* OLD/To be implemented version that uses coloring to get around race condition. About 1.6x faster */
 
 
 #ifdef USE_MESH_COLORING_GPU
       // no atomic operation needed, colors don't share global points between elements
-      d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
-      d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
-      d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+      d_accel[iglob*3]     += sum_terms1;
+      d_accel[iglob*3 + 1] += sum_terms2;
+      d_accel[iglob*3 + 2] += sum_terms3;
 #else
       //mesh coloring
       if( use_mesh_coloring_gpu ){
 
        // no atomic operation needed, colors don't share global points between elements
-        d_accel[iglob*3] -= (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l);
-        d_accel[iglob*3 + 1] -= (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l);
-        d_accel[iglob*3 + 2] -= (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l);
+        d_accel[iglob*3]     += sum_terms1;
+        d_accel[iglob*3 + 1] += sum_terms2;
+        d_accel[iglob*3 + 2] += sum_terms3;
 
-
       }else{
 
-        atomicAdd(&d_accel[iglob*3],-(fac1*tempx1l + fac2*tempx2l + fac3*tempx3l));
-        atomicAdd(&d_accel[iglob*3+1],-(fac1*tempy1l + fac2*tempy2l + fac3*tempy3l));
-        atomicAdd(&d_accel[iglob*3+2],-(fac1*tempz1l + fac2*tempz2l + fac3*tempz3l));
+        // for testing purposes only: w/out atomic updates
+        //d_accel[iglob*3] -= (0.00000001f*tempx1l + 0.00000001f*tempx2l + 0.00000001f*tempx3l);
+        //d_accel[iglob*3 + 1] -= (0.00000001f*tempy1l + 0.00000001f*tempy2l + 0.00000001f*tempy3l);
+        //d_accel[iglob*3 + 2] -= (0.00000001f*tempz1l + 0.00000001f*tempz2l + 0.00000001f*tempz3l);
+
+        atomicAdd(&d_accel[iglob*3], sum_terms1);
+        atomicAdd(&d_accel[iglob*3+1], sum_terms2);
+        atomicAdd(&d_accel[iglob*3+2], sum_terms3);
+
       }
 #endif
 
@@ -1005,7 +1132,8 @@
               realw* d_c46store,
               realw* d_c55store,
               realw* d_c56store,
-              realw* d_c66store){
+              realw* d_c66store,
+              realw* d_rhostore){
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("before kernel Kernel 2");
@@ -1079,8 +1207,12 @@
                                   d_c46store,
                                   d_c55store,
                                   d_c56store,
-                                  d_c66store
-                                  );
+                                  d_c66store,
+                                  mp->gravity,
+                                  mp->d_minus_g,
+                                  mp->d_minus_deriv_gravity,
+                                  d_rhostore,
+                                  mp->d_wgll_cube);
 
 
   if(SIMULATION_TYPE == 3) {
@@ -1130,8 +1262,12 @@
                                      d_c46store,
                                      d_c55store,
                                      d_c56store,
-                                     d_c66store
-                                     );
+                                     d_c66store,
+                                     mp->gravity,
+                                     mp->d_minus_g,
+                                     mp->d_minus_deriv_gravity,
+                                     d_rhostore,
+                                     mp->d_wgll_cube);
   }
 
   // cudaEventRecord( stop, 0 );
@@ -1281,7 +1417,8 @@
                mp->d_c46store + color_offset,
                mp->d_c55store + color_offset,
                mp->d_c56store + color_offset,
-               mp->d_c66store + color_offset);
+               mp->d_c66store + color_offset,
+               mp->d_rhostore + color_offset);
 
       // for padded and aligned arrays
       color_offset += nb_blocks_to_compute * NGLL3_PADDED;
@@ -1354,7 +1491,8 @@
              mp->d_c46store,
              mp->d_c55store,
              mp->d_c56store,
-             mp->d_c66store);
+             mp->d_c66store,
+             mp->d_rhostore);
   }
 }
 
@@ -1537,41 +1675,41 @@
   int iface = blockIdx.x + gridDim.x*blockIdx.y;
   realw nx,ny,nz;
   realw force_normal_comp,additional_term;
-  
+
   // for all faces on free surface
   if( iface < num_free_surface_faces ){
-    
+
     int ispec = free_surface_ispec[iface]-1;
-    
+
     // gets global point index
     int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface)
     int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
     int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
-    
+
     int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
-    
+
     //if(igll == 0 ) printf("igll %d %d %d %d\n",igll,i,j,k,iglob);
-    
+
     // only update this global point once
-    
+
     // daniel: TODO - there might be better ways to implement a mutex like below,
     //            and find a workaround to not use the temporary update array.
     //            atomicExch: returns the old value, i.e. 0 indicates that we still have to do this point
-    
+
     if( atomicExch(&updated_dof_ocean_load[iglob],1) == 0){
-      
+
       // get normal
       nx = free_surface_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; //(1,igll,iface)
       ny = free_surface_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
       nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
-      
+
       // make updated component of right-hand side
       // we divide by rmass() which is 1 / M
       // we use the total force which includes the Coriolis term above
       force_normal_comp = ( accel[iglob*3]*nx + accel[iglob*3+1]*ny + accel[iglob*3+2]*nz ) / rmass[iglob];
-      
+
       additional_term = (rmass_ocean_load[iglob] - rmass[iglob]) * force_normal_comp;
-      
+
       // probably wouldn't need atomicAdd anymore, but just to be sure...
       atomicAdd(&accel[iglob*3], + additional_term * nx);
       atomicAdd(&accel[iglob*3+1], + additional_term * ny);
@@ -1586,36 +1724,36 @@
 void FC_FUNC_(elastic_ocean_load_cuda,
               ELASTIC_OCEAN_LOAD_CUDA)(long* Mesh_pointer_f,
                                        int* SIMULATION_TYPE) {
-  
+
   TRACE("elastic_ocean_load_cuda");
-  
+
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  
+
   // checks if anything to do
   if( mp->num_free_surface_faces == 0 ) return;
-  
+
   // block sizes: exact blocksize to match NGLLSQUARE
   int blocksize = NGLL2;
-  
+
   int num_blocks_x = mp->num_free_surface_faces;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = ceil(num_blocks_x/2.0);
     num_blocks_y = num_blocks_y*2;
   }
-  
+
   dim3 grid(num_blocks_x,num_blocks_y);
   dim3 threads(blocksize,1,1);
-  
-  
+
+
   // initializes temporary array to zero
   print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
                                      sizeof(int)*mp->NGLOB_AB),88501);
-  
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("before kernel elastic_ocean_load_cuda");
 #endif
-  
+
   elastic_ocean_load_cuda_kernel<<<grid,threads>>>(mp->d_accel,
                                                    mp->d_rmass,
                                                    mp->d_rmass_ocean_load,
@@ -1630,7 +1768,7 @@
     // re-initializes array
     print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
                                        sizeof(int)*mp->NGLOB_AB),88502);
-    
+
     elastic_ocean_load_cuda_kernel<<<grid,threads>>>(mp->d_b_accel,
                                                      mp->d_rmass,
                                                      mp->d_rmass_ocean_load,
@@ -1640,10 +1778,10 @@
                                                      mp->d_free_surface_normal,
                                                      mp->d_ibool,
                                                      mp->d_updated_dof_ocean_load);
-    
+
   }
-  
-  
+
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("elastic_ocean_load_cuda");
 #endif
@@ -1829,3 +1967,20 @@
 
 }
 
+void setConst_wgll_cube(realw* array,Mesh* mp)
+{
+  cudaError_t err = cudaMemcpyToSymbol(d_wgll_cube, array, NGLL3*sizeof(realw));
+  if (err != cudaSuccess)
+  {
+    fprintf(stderr, "Error in setConst_wgll_cube: %s\n", cudaGetErrorString(err));
+    exit(1);
+  }
+  //mp->d_wgll_cube = d_wgll_cube;
+  err = cudaGetSymbolAddress((void**)&(mp->d_wgll_cube),"d_wgll_cube");
+  if(err != cudaSuccess) {
+    fprintf(stderr, "Error with d_wgll_cube: %s\n", cudaGetErrorString(err));
+    exit(1);
+  }
+
+}
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_kernels_cuda.cu	2011-11-17 01:58:40 UTC (rev 19212)
@@ -261,7 +261,8 @@
                                         realw* d_gammax,
                                         realw* d_gammay,
                                         realw* d_gammaz,
-                                        realw rhol) {
+                                        realw rhol,
+                                        int gravity) {
 
   realw temp1l,temp2l,temp3l;
   realw hp1,hp2,hp3;
@@ -313,8 +314,11 @@
   gammayl = d_gammay[offset];
   gammazl = d_gammaz[offset];
 
-  rho_invl = 1.0f / rhol;
-
+  if( gravity ){
+    rho_invl = 1.0f;
+  }else{
+    rho_invl = 1.0f / rhol;
+  }
   // derivatives of acoustic scalar potential field on GLL points
   vector_field_element[0] = (temp1l*xixl + temp2l*etaxl + temp3l*gammaxl) * rho_invl;
   vector_field_element[1] = (temp1l*xiyl + temp2l*etayl + temp3l*gammayl) * rho_invl;
@@ -347,7 +351,8 @@
                                                 realw* rho_ac_kl,
                                                 realw* kappa_ac_kl,
                                                 realw deltat,
-                                                int NSPEC_AB) {
+                                                int NSPEC_AB,
+                                                int gravity) {
 
   int ispec = blockIdx.x + blockIdx.y*gridDim.x;
 
@@ -384,13 +389,13 @@
       compute_gradient_kernel(ijk,ispec,scalar_field_displ,b_displ_elm,
                               hprime_xx,hprime_yy,hprime_zz,
                               d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
-                              rhol);
+                              rhol,gravity);
 
       // acceleration vector
       compute_gradient_kernel(ijk,ispec,scalar_field_accel,accel_elm,
                               hprime_xx,hprime_yy,hprime_zz,
                               d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
-                              rhol);
+                              rhol,gravity);
 
       // density kernel
       rho_ac_kl[ijk_ispec] -= deltat * rhol * (accel_elm[0]*b_displ_elm[0] +
@@ -453,7 +458,8 @@
                                                     mp->d_rho_ac_kl,
                                                     mp->d_kappa_ac_kl,
                                                     deltat,
-                                                    mp->NSPEC_AB);
+                                                    mp->NSPEC_AB,
+                                                    mp->gravity);
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("compute_kernels_acoustic_kernel");
@@ -515,7 +521,8 @@
                                                    realw* d_gammaz,
                                                    realw* hess_kl,
                                                    realw deltat,
-                                                   int NSPEC_AB) {
+                                                   int NSPEC_AB,
+                                                   int gravity) {
 
   int ispec = blockIdx.x + blockIdx.y*gridDim.x;
 
@@ -553,14 +560,14 @@
                               scalar_field_accel,accel_elm,
                               hprime_xx,hprime_yy,hprime_zz,
                               d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
-                              rhol);
+                              rhol,gravity);
 
       // acceleration vector from backward field
       compute_gradient_kernel(ijk,ispec,
                               scalar_field_b_accel,b_accel_elm,
                               hprime_xx,hprime_yy,hprime_zz,
                               d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
-                              rhol);
+                              rhol,gravity);
       // approximates hessian
       hess_kl[ijk_ispec] += deltat * (accel_elm[0]*b_accel_elm[0] +
                                       accel_elm[1]*b_accel_elm[1] +
@@ -626,7 +633,8 @@
                                                          mp->d_gammaz,
                                                          mp->d_hess_ac_kl,
                                                          deltat,
-                                                         mp->NSPEC_AB);
+                                                         mp->NSPEC_AB,
+                                                         mp->gravity);
   }
 
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/compute_stacey_acoustic_cuda.cu	2011-11-17 01:58:40 UTC (rev 19212)
@@ -54,8 +54,8 @@
                                                int num_abs_boundary_faces,
                                                realw* b_potential_dot_acoustic,
                                                realw* b_potential_dot_dot_acoustic,
-                                               realw* b_absorb_potential
-                                               ) {
+                                               realw* b_absorb_potential,
+                                               int gravity) {
 
   int igll = threadIdx.x;
   int iface = blockIdx.x + gridDim.x*blockIdx.y;
@@ -63,8 +63,8 @@
   int i,j,k,iglob,ispec;
   realw rhol,kappal,cpl;
   realw jacobianw;
+  realw vel;
 
-
   // don't compute points outside NGLLSQUARE==NGLL2==25
   // way 2: no further check needed since blocksize = 25
   if( iface < num_abs_boundary_faces){
@@ -88,18 +88,29 @@
 
       cpl = sqrt( kappal / rhol );
 
+      // velocity
+      if( gravity ){
+        // daniel: TODO - check stacey here...
+        // uses a potential definition of: s = grad(chi)
+        vel = potential_dot_acoustic[iglob] / rhol ;
+      }else{
+        // uses a potential definition of: s = 1/rho grad(chi)
+        vel = potential_dot_acoustic[iglob] / rhol;
+      }
+
       // gets associated, weighted jacobian
       jacobianw = abs_boundary_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
 
       // Sommerfeld condition
-      atomicAdd(&potential_dot_dot_acoustic[iglob],-potential_dot_acoustic[iglob]*jacobianw/cpl/rhol);
+      atomicAdd(&potential_dot_dot_acoustic[iglob],-vel*jacobianw/cpl);
 
       // adjoint simulations
       if( SIMULATION_TYPE == 3 ){
         // Sommerfeld condition
         atomicAdd(&b_potential_dot_dot_acoustic[iglob],-b_absorb_potential[INDEX2(NGLL2,igll,iface)]);
       }else if( SIMULATION_TYPE == 1 && SAVE_FORWARD ){
-         b_absorb_potential[INDEX2(NGLL2,igll,iface)] = potential_dot_acoustic[iglob]*jacobianw/cpl/rhol;
+        // saves boundary values
+        b_absorb_potential[INDEX2(NGLL2,igll,iface)] = vel*jacobianw/cpl;
       }
     }
 //  }
@@ -164,7 +175,8 @@
                                                    mp->d_num_abs_boundary_faces,
                                                    mp->d_b_potential_dot_acoustic,
                                                    mp->d_b_potential_dot_dot_acoustic,
-                                                   mp->d_b_absorb_potential);
+                                                   mp->d_b_absorb_potential,
+                                                   mp->gravity);
 
   //  adjoint simulations: stores absorbed wavefield part
   if (SIMULATION_TYPE == 1 && SAVE_FORWARD && mp->d_num_abs_boundary_faces > 0 ){

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2011-11-17 01:58:40 UTC (rev 19212)
@@ -28,16 +28,14 @@
 
 /* daniel: trivia
 
-- for most real working arrays we use float (single exception so far is stf_pre_compute).
-  TODO: we could use "realw" instead of "float" type declaration to make it easier to switch
-              between a real or double precision simulation (matching CUSTOM_REAL == 4 or 8 in fortran routines).
+- for most working arrays we use now "realw" instead of "float" type declarations to make it easier to switch
+  between a real or double precision simulation
+  (matching CUSTOM_REAL == 4 or 8 in fortran routines).
 
 - instead of boolean "logical" declared in fortran routines, in C (or Cuda-C) we have to use "int" variables.
-
   ifort / gfortran caveat:
     to check whether it is true or false, do not check for == 1 to test for true values since ifort just uses
     non-zero values for true (e.g. can be -1 for true). however, false will be always == 0.
-
   thus, rather use: if( var ) {...}  for testing if true instead of if( var == 1){...} (alternative: one could use if( var != 0 ){...}
 
 */
@@ -96,7 +94,7 @@
 // dimensions
 #define NDIM 3
 
-// Gauss-Lobatto-Legendre 
+// Gauss-Lobatto-Legendre
 #define NGLLX 5
 #define NGLL2 25
 #define NGLL3 125 // no padding: requires same size as in fortran for NGLLX * NGLLY * NGLLZ
@@ -120,6 +118,10 @@
 // leads up to ~ 5% performance increase
 //#define USE_MESH_COLORING_GPU
 
+// (optional) unrolling loops
+// leads up to ~1% performance increase
+//#define MANUALLY_UNROLLED_LOOPS
+
 // cuda kernel block size for updating displacements/potential (newmark time scheme)
 #define BLOCKSIZE_KERNEL1 128
 #define BLOCKSIZE_KERNEL3 128
@@ -178,11 +180,12 @@
   realw* d_hprime_xx; realw* d_hprime_yy; realw* d_hprime_zz;
   realw* d_hprimewgll_xx; realw* d_hprimewgll_yy; realw* d_hprimewgll_zz;
   realw* d_wgllwgll_xy; realw* d_wgllwgll_xz; realw* d_wgllwgll_yz;
+  realw* d_wgll_cube;
 
   // mpi buffers
   int num_interfaces_ext_mesh;
   int max_nibool_interfaces_ext_mesh;
-  
+
   // ------------------------------------------------------------------ //
   // elastic wavefield parameters
   // ------------------------------------------------------------------ //
@@ -205,7 +208,7 @@
   int nspec_elastic;
 
   realw* d_rmass;
-  
+
   // mpi buffer
   realw* d_send_accel_buffer;
 
@@ -365,7 +368,7 @@
   realw* d_rhostore;
   realw* d_kappastore;
   realw* d_rmass_acoustic;
-  
+
   // mpi buffer
   realw* d_send_potential_dot_dot_buffer;
 
@@ -389,6 +392,11 @@
   realw* d_coupling_ac_el_normal;
   realw* d_coupling_ac_el_jacobian2Dw;
 
+  // gravity
+  int gravity;
+  realw* d_minus_deriv_gravity;
+  realw* d_minus_g;
+
 } Mesh;
 
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_constants_cuda.h	2011-11-17 01:58:40 UTC (rev 19212)
@@ -48,6 +48,8 @@
 void setConst_wgllwgll_xz(realw* array, Mesh* mp);
 void setConst_wgllwgll_yz(realw* array, Mesh* mp);
 
+void setConst_wgll_cube(realw* array, Mesh* mp);
+
 /* ----------------------------------------------------------------------------------------------- */
 
 /* CUDA specific things from specfem3D_kernels.cu */

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2011-11-17 01:58:40 UTC (rev 19212)
@@ -702,7 +702,7 @@
   if( mp->nadj_rec_local > 0 ){
     print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_adj_sourcearrays,
                                        (mp->nadj_rec_local)*3*NGLL3*sizeof(realw)),7003);
-                                       
+
     print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_pre_computed_irec,
                                        (mp->nadj_rec_local)*sizeof(int)),7004);
 
@@ -783,7 +783,7 @@
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_dot_dot_acoustic),sizeof(realw)*size_glob),9003);
 
   // mpi buffer
-  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_potential_dot_dot_buffer), 
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_potential_dot_dot_buffer),
                       (mp->max_nibool_interfaces_ext_mesh)*(mp->num_interfaces_ext_mesh)*sizeof(realw)),9004);
 
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_acoustic),sizeof(realw)*size_glob),9005);
@@ -1526,9 +1526,58 @@
 #endif
 }
 
+/* ----------------------------------------------------------------------------------------------- */
 
+// GRAVITY simulations
+
 /* ----------------------------------------------------------------------------------------------- */
 
+extern "C"
+void FC_FUNC_(prepare_fields_gravity_device,
+              PREPARE_FIELDS_gravity_DEVICE)(long* Mesh_pointer_f,
+                                             int* GRAVITY,
+                                             realw* minus_deriv_gravity,
+                                             realw* minus_g,
+                                             realw* h_wgll_cube,
+                                             int* ACOUSTIC_SIMULATION,
+                                             realw* rhostore) {
+
+  TRACE("prepare_fields_gravity_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+  setConst_wgll_cube(h_wgll_cube,mp);
+
+  mp->gravity = *GRAVITY;
+  if( mp->gravity ){
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_minus_deriv_gravity),
+                                       (mp->NGLOB_AB)*sizeof(realw)),7700);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_deriv_gravity, minus_deriv_gravity,
+                                       (mp->NGLOB_AB)*sizeof(realw),cudaMemcpyHostToDevice),7701);
+
+    print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_minus_g),
+                                       (mp->NGLOB_AB)*sizeof(realw)),7702);
+    print_CUDA_error_if_any(cudaMemcpy(mp->d_minus_g, minus_g,
+                                       (mp->NGLOB_AB)*sizeof(realw),cudaMemcpyHostToDevice),7703);
+
+
+    if( *ACOUSTIC_SIMULATION == 0 ){
+      // rhostore not allocated yet
+      int size_padded = NGLL3_PADDED * (mp->NSPEC_AB);
+      // padded array
+      print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rhostore),size_padded*sizeof(realw)),7706);
+      // transfer constant element data with padding
+      for(int i=0; i < mp->NSPEC_AB; i++) {
+        print_CUDA_error_if_any(cudaMemcpy(mp->d_rhostore+i*NGLL3_PADDED, &rhostore[i*NGLL3],
+                                           NGLL3*sizeof(realw),cudaMemcpyHostToDevice),7707);
+      }
+    }
+  }
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
 // cleanup
 
 /* ----------------------------------------------------------------------------------------------- */

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c	2011-11-17 01:58:40 UTC (rev 19212)
@@ -501,6 +501,14 @@
                                       int* OCEANS,
                                       int* APPROXIMATE_HESS_KL){}
 
+void FC_FUNC_(prepare_fields_gravity_device,
+              PREPARE_FIELDS_gravity_DEVICE)(long* Mesh_pointer_f,
+                                             int* GRAVITY,
+                                             realw* minus_deriv_gravity, 
+                                             realw* minus_g,
+                                             realw* h_wgll_cube,
+                                             int* ACOUSTIC_SIMULATION,
+                                             realw* rhostore){}
 /* from file transfer_fields_cuda.cu				      */
 
 void FC_FUNC_(transfer_fields_el_to_device,

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/create_regions_mesh.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -1843,10 +1843,11 @@
     stop 'error temporary global permutation incomplete'
   endif
 
+  ! checks perm entries
   if(minval(temp_perm_global) /= 1) call exit_MPI(myrank, 'minval(temp_perm_global) should be 1')
   if(maxval(temp_perm_global) /= nspec) call exit_MPI(myrank, 'maxval(temp_perm_global) should be nspec')
 
-  ! checks if every element was set
+  ! checks if every element was uniquely set
   allocate(mask_global(nspec),stat=ier)
   if( ier /= 0 ) stop 'error allocating temporary mask_global'
   mask_global(:) = .false.

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_coupling_surfaces.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_coupling_surfaces.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_coupling_surfaces.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -65,7 +65,7 @@
   integer :: count_elastic,count_acoustic
 
   ! mpi interface communication
-  integer, dimension(:), allocatable :: elastic_flag,acoustic_flag,test_flag
+  integer, dimension(:), allocatable :: elastic_flag,acoustic_flag !,test_flag
   integer, dimension(:,:), allocatable :: ibool_interfaces_ext_mesh_dummy
   integer :: max_nibool_interfaces_ext_mesh
   logical, dimension(:), allocatable :: mask_ibool
@@ -115,13 +115,13 @@
   if( ier /= 0 ) stop 'error allocating array elastic_flag'
   allocate(acoustic_flag(nglob),stat=ier)
   if( ier /= 0 ) stop 'error allocating array acoustic_flag'
-  allocate(test_flag(nglob),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array test_flag'
+  !allocate(test_flag(nglob),stat=ier)
+  !if( ier /= 0 ) stop 'error allocating array test_flag'
   allocate(mask_ibool(nglob),stat=ier)
   if( ier /= 0 ) stop 'error allocating array mask_ibool'
   elastic_flag(:) = 0
   acoustic_flag(:) = 0
-  test_flag(:) = 0
+  !test_flag(:) = 0
   count_elastic = 0
   count_acoustic = 0
   do ispec = 1, nspec
@@ -140,7 +140,7 @@
           ! sets acoustic flag
           if( ispec_is_acoustic(ispec) ) acoustic_flag(iglob) =  myrank+1
           ! sets test flag
-          test_flag(iglob) = myrank+1
+          !test_flag(iglob) = myrank+1
         enddo
       enddo
     enddo
@@ -174,120 +174,137 @@
                         my_neighbours_ext_mesh)
 
   ! sums test flags
-  call assemble_MPI_scalar_i_ext_mesh(NPROC,nglob,test_flag, &
-                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh_dummy,&
-                        my_neighbours_ext_mesh)
+  !call assemble_MPI_scalar_i_ext_mesh(NPROC,nglob,test_flag, &
+  !                      num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+  !                      nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh_dummy,&
+  !                      my_neighbours_ext_mesh)
 
   ! loops over all element faces and
   ! counts number of coupling faces between acoustic and elastic elements
   mask_ibool(:) = .false.
   inum = 0
+
+  ! coupling surfaces: takes point of view from acoustic elements, i.e. if element is acoustic
+  !                               and has an elastic neighbor, then this acoustic element is added to
+  !                               the coupling elements and its corresponding coupling surface
+  !                               ( no matter in which MPI partition it is)
+  ! note: we use acoustic elements as reference elements because we will need
+  !          density from acoustic element when coupling pressure in case of gravity
   do ispec=1,nspec
+    if( ispec_is_acoustic(ispec) ) then
 
-    ! loops over each face
-    do iface_ref= 1, 6
+      ! loops over each face
+      do iface_ref= 1, 6
 
-      ! takes indices of corners of reference face
-      do icorner = 1,NGNOD2D
-        i = iface_all_corner_ijk(1,icorner,iface_ref)
-        j = iface_all_corner_ijk(2,icorner,iface_ref)
-        k = iface_all_corner_ijk(3,icorner,iface_ref)
-        ! global reference indices
-        iglob_corners_ref(icorner) = ibool(i,j,k,ispec)
+        ! takes indices of corners of reference face
+        do icorner = 1,NGNOD2D
+          i = iface_all_corner_ijk(1,icorner,iface_ref)
+          j = iface_all_corner_ijk(2,icorner,iface_ref)
+          k = iface_all_corner_ijk(3,icorner,iface_ref)
+          ! global reference indices
+          iglob_corners_ref(icorner) = ibool(i,j,k,ispec)
 
-        ! reference corner coordinates
-        xcoord(icorner) = xstore_dummy(iglob_corners_ref(icorner))
-        ycoord(icorner) = ystore_dummy(iglob_corners_ref(icorner))
-        zcoord(icorner) = zstore_dummy(iglob_corners_ref(icorner))
-      enddo
+          ! reference corner coordinates
+          xcoord(icorner) = xstore_dummy(iglob_corners_ref(icorner))
+          ycoord(icorner) = ystore_dummy(iglob_corners_ref(icorner))
+          zcoord(icorner) = zstore_dummy(iglob_corners_ref(icorner))
+        enddo
 
-      ! checks if face has acoustic side
-      if( acoustic_flag( iglob_corners_ref(1) ) >= 1 .and. &
-         acoustic_flag( iglob_corners_ref(2) ) >= 1 .and. &
-         acoustic_flag( iglob_corners_ref(3) ) >= 1 .and. &
-         acoustic_flag( iglob_corners_ref(4) ) >= 1) then
-        ! checks if face is has an elastic side
-        if( elastic_flag( iglob_corners_ref(1) ) >= 1 .and. &
-           elastic_flag( iglob_corners_ref(2) ) >= 1 .and. &
-           elastic_flag( iglob_corners_ref(3) ) >= 1 .and. &
-           elastic_flag( iglob_corners_ref(4) ) >= 1) then
+        ! checks if face has acoustic side
+        if( acoustic_flag( iglob_corners_ref(1) ) >= 1 .and. &
+           acoustic_flag( iglob_corners_ref(2) ) >= 1 .and. &
+           acoustic_flag( iglob_corners_ref(3) ) >= 1 .and. &
+           acoustic_flag( iglob_corners_ref(4) ) >= 1) then
+          ! checks if face is has an elastic side
+          if( elastic_flag( iglob_corners_ref(1) ) >= 1 .and. &
+             elastic_flag( iglob_corners_ref(2) ) >= 1 .and. &
+             elastic_flag( iglob_corners_ref(3) ) >= 1 .and. &
+             elastic_flag( iglob_corners_ref(4) ) >= 1) then
 
-          ! reference midpoint on face (used to avoid redundant face counting)
-          i = iface_all_midpointijk(1,iface_ref)
-          j = iface_all_midpointijk(2,iface_ref)
-          k = iface_all_midpointijk(3,iface_ref)
-          iglob_midpoint = ibool(i,j,k,ispec)
+            ! reference midpoint on face (used to avoid redundant face counting)
+            i = iface_all_midpointijk(1,iface_ref)
+            j = iface_all_midpointijk(2,iface_ref)
+            k = iface_all_midpointijk(3,iface_ref)
+            iglob_midpoint = ibool(i,j,k,ispec)
 
-          ! checks if points on this face are masked already
-          if( .not. mask_ibool(iglob_midpoint) .and. &
-              ( acoustic_flag(iglob_midpoint) >= 1 .and. elastic_flag(iglob_midpoint) >= 1) ) then
+            ! checks if points on this face are masked already
+            if( .not. mask_ibool(iglob_midpoint) .and. &
+                ( acoustic_flag(iglob_midpoint) >= 1 .and. elastic_flag(iglob_midpoint) >= 1) ) then
 
-            ! gets face GLL points i,j,k indices from element face
-            call get_element_face_gll_indices(iface_ref,ijk_face,NGLLX,NGLLY)
+              ! gets face GLL points i,j,k indices from element face
+              call get_element_face_gll_indices(iface_ref,ijk_face,NGLLX,NGLLY)
 
-            ! takes each element face only once, if it lies on an MPI interface
-            ! note: this is not exactly load balanced
-            !          lowest rank process collects as many faces as possible, second lowest as so forth
-            if( (test_flag(iglob_midpoint) == myrank+1) .or. &
-               (test_flag(iglob_midpoint) > 2*(myrank+1)) ) then
 
-              ! gets face GLL 2Djacobian, weighted from element face
-              call get_jacobian_boundary_face(myrank,nspec, &
-                        xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob, &
-                        dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        ispec,iface_ref,jacobian2Dw_face,normal_face,NGLLX,NGLLY)
+              ! takes each element face only once, if it lies on an MPI interface
+              ! note: this is not exactly load balanced
+              !          lowest rank process collects as many faces as possible, second lowest and so forth
+              !if( (test_flag(iglob_midpoint) == myrank+1) .or. &
+              !   (test_flag(iglob_midpoint) > 2*(myrank+1)) ) then
 
-              ! normal convention: points away from acoustic, reference element
-              !                                switch normal direction if necessary
-              do j=1,NGLLY
-                do i=1,NGLLX
-                    ! directs normals such that they point outwards of element
-                    call get_element_face_normal(ispec,iface_ref,xcoord,ycoord,zcoord, &
-                                                ibool,nspec,nglob, &
-                                                xstore_dummy,ystore_dummy,zstore_dummy, &
-                                                normal_face(:,i,j) )
-                    ! makes sure that it always points away from acoustic element,
-                    ! otherwise switch direction
-                    if( ispec_is_elastic(ispec) ) normal_face(:,i,j) = - normal_face(:,i,j)
+
+                ! gets face GLL 2Djacobian, weighted from element face
+                call get_jacobian_boundary_face(myrank,nspec, &
+                          xstore_dummy,ystore_dummy,zstore_dummy,ibool,nglob, &
+                          dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
+                          wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                          ispec,iface_ref,jacobian2Dw_face,normal_face,NGLLX,NGLLY)
+
+                ! normal convention: points away from acoustic, reference element
+                !                                switch normal direction if necessary
+                do j=1,NGLLY
+                  do i=1,NGLLX
+                      ! directs normals such that they point outwards of element
+                      call get_element_face_normal(ispec,iface_ref,xcoord,ycoord,zcoord, &
+                                                  ibool,nspec,nglob, &
+                                                  xstore_dummy,ystore_dummy,zstore_dummy, &
+                                                  normal_face(:,i,j) )
+                      ! makes sure that it always points away from acoustic element,
+                      ! otherwise switch direction
+                      ! note: this should not happen, since we only loop over acoustic elements
+                      !if( ispec_is_elastic(ispec) ) normal_face(:,i,j) = - normal_face(:,i,j)
+                      if( ispec_is_elastic(ispec) ) stop 'error acoustic-elastic coupling surface'
+                  enddo
                 enddo
-              enddo
 
-              ! stores informations about this face
-              inum = inum + 1
-              tmp_ispec(inum) = ispec
-              igll = 0
-              do j=1,NGLLY
-                do i=1,NGLLX
-                  ! adds all gll points on this face
-                  igll = igll + 1
+                ! stores informations about this face
+                inum = inum + 1
+                tmp_ispec(inum) = ispec
+                igll = 0
+                do j=1,NGLLY
+                  do i=1,NGLLX
+                    ! adds all gll points on this face
+                    igll = igll + 1
 
-                  ! do we need to store local i,j,k,ispec info? or only global indices iglob?
-                  tmp_ijk(:,igll,inum) = ijk_face(:,i,j)
+                    ! do we need to store local i,j,k,ispec info? or only global indices iglob?
+                    tmp_ijk(:,igll,inum) = ijk_face(:,i,j)
 
-                  ! stores weighted jacobian and normals
-                  tmp_jacobian2Dw(igll,inum) = jacobian2Dw_face(i,j)
-                  tmp_normal(:,igll,inum) = normal_face(:,i,j)
+                    ! stores weighted jacobian and normals
+                    tmp_jacobian2Dw(igll,inum) = jacobian2Dw_face(i,j)
+                    tmp_normal(:,igll,inum) = normal_face(:,i,j)
 
-                  ! masks global points ( to avoid redundant counting of faces)
-                  iglob = ibool(ijk_face(1,i,j),ijk_face(2,i,j),ijk_face(3,i,j),ispec)
-                  mask_ibool(iglob) = .true.
+                    ! masks global points ( to avoid redundant counting of faces)
+                    iglob = ibool(ijk_face(1,i,j),ijk_face(2,i,j),ijk_face(3,i,j),ispec)
+                    mask_ibool(iglob) = .true.
+                  enddo
                 enddo
-              enddo
-            else
-              ! assumes to be already collected by lower rank process, masks face points
-              do j=1,NGLLY
-                do i=1,NGLLX
-                  iglob = ibool(ijk_face(1,i,j),ijk_face(2,i,j),ijk_face(3,i,j),ispec)
-                  mask_ibool(iglob) = .true.
-                enddo
-              enddo
-            endif ! test_flag
-          endif ! mask_ibool
-        endif ! elastic_flag
-      endif ! acoustic_flag
-    enddo ! iface_ref
+
+              ! test_flags shouldn't matter, there is only 1 acoustic element touching a coupled surface
+              ! which will be considered in the MPI partition which contains it
+              !else
+              !  ! assumes to be already collected by lower rank process, masks face points
+              !  do j=1,NGLLY
+              !    do i=1,NGLLX
+              !      iglob = ibool(ijk_face(1,i,j),ijk_face(2,i,j),ijk_face(3,i,j),ispec)
+              !      mask_ibool(iglob) = .true.
+              !    enddo
+              !  enddo
+              !endif ! test_flag
+
+            endif ! mask_ibool
+          endif ! elastic_flag
+        endif ! acoustic_flag
+      enddo ! iface_ref
+    endif ! ispec_is_acoustic
   enddo ! ispec
 
 ! stores completed coupling face informations

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/generate_databases/get_perm_color.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -68,24 +68,23 @@
   ! local variables
   integer :: nb_colors
 
-  ! coloring algorithm
+  ! coloring algorithm w/ Droux
   call get_color_faster(ibool, is_on_a_slice_edge, ispec_is_d, &
                         myrank, nspec, nglob, &
                         color, nb_colors_outer_elements, nb_colors_inner_elements, &
                         nspec_outer,nspec_inner,nspec_domain)
 
-  !debug
-  !if(myrank == 0) then
-  !  print*, 'rank :',myrank,' - colors:'
-  !  print*, '   number of colors for inner elements = ',nb_colors_inner_elements
-  !  print*, '   number of colors for outer elements = ',nb_colors_outer_elements
-  !  print*, '   total number of colors (sum of both) = ', nb_colors_inner_elements + nb_colors_outer_elements
-  !  print*, 'rank :',myrank,' - elements:'
-  !  print*, '   number of elements for outer elements  = ',nspec_outer
-  !  print*, '   number of elements for inner elements  = ',nspec_inner
-  !  print*, '   total number of elements for domain elements  = ',nspec_domain
-  !endif
-  !if(myrank == 0) print*, '  generating the final colors'
+  !debug output
+  if(myrank == 0) then
+    write(IMAIN,*) '     colors:'
+    write(IMAIN,*) '     number of colors for inner elements = ',nb_colors_inner_elements
+    write(IMAIN,*) '     number of colors for outer elements = ',nb_colors_outer_elements
+    write(IMAIN,*) '     total number of colors (sum of both) = ', nb_colors_inner_elements + nb_colors_outer_elements
+    write(IMAIN,*) '     elements:'
+    write(IMAIN,*) '     number of elements for outer elements  = ',nspec_outer
+    write(IMAIN,*) '     number of elements for inner elements  = ',nspec_inner
+    write(IMAIN,*) '     total number of elements for domain elements  = ',nspec_domain
+  endif
 
   ! total number of colors used
   nb_colors = nb_colors_inner_elements+nb_colors_outer_elements
@@ -96,8 +95,6 @@
                      nspec,nb_colors,nb_colors_outer_elements, &
                      ispec_is_d,nspec_domain)
 
-  !debug
-  !if(myrank == 0) print*, '  done with mesh coloring and inner/outer element splitting'
 
   end subroutine get_perm_color_faster
 
@@ -130,10 +127,24 @@
   integer :: icolor, nb_already_colored
   integer :: iglob1,iglob2,iglob3,iglob4,iglob5,iglob6,iglob7,iglob8
   logical :: conflict_found_need_new_color
+  !! DK DK Nov 2011 added this
+  ! Droux
+  logical, dimension(:), allocatable :: icolor_conflict_found
+  integer, dimension(:), allocatable :: nb_elems_in_this_color
+  logical :: try_Droux_coloring
+  integer :: ispec2,ncolors,icolormin,icolormax,icolor_chosen,nb_elems_in_color_chosen
+  integer :: nb_tries_of_Droux_1993,last_ispec_studied
+  ! valence
+  integer, dimension(:), allocatable :: count_ibool
+  integer :: maxval_count_ibool_outer,maxval_count_ibool_inner
 
   ! user output
   if( myrank == 0 ) then
-    write(IMAIN,*) '     fast coloring mesh algorithm'
+    if( USE_DROUX_OPTIMIZATION ) then
+      write(IMAIN,*) '     fast coloring mesh algorithm w/ Droux optimization'
+    else
+      write(IMAIN,*) '     fast coloring mesh algorithm'
+    endif
   endif
 
   ! counts number of elements for inner, outer and total domain
@@ -167,16 +178,24 @@
   !  print *
   !endif
 
-  ! first set color of all elements to 0
+  ! Droux optimization
+  try_Droux_coloring = USE_DROUX_OPTIMIZATION
+  nb_tries_of_Droux_1993 = 1
+
+  ! entry point for fail-safe mechanism when Droux 1993 fails
+  999 continue
+
+  ! first set color of all elements to 0,
+  ! to use it as a flag to detect elements not yet colored
   color(:) = 0
   icolor = 0
   nb_already_colored = 0
 
   ! colors outer elements
   do while( nb_already_colored < nspec_outer )
-    icolor = icolor + 1
 
     333 continue
+    icolor = icolor + 1
 
     ! debug: user output
     !if(myrank == 0) then
@@ -232,7 +251,6 @@
     !endif
 
     if(conflict_found_need_new_color) then
-      icolor = icolor + 1
       if( icolor >= MAX_NUMBER_OF_COLORS ) stop 'error MAX_NUMBER_OF_COLORS too small'
       goto 333
     endif
@@ -242,9 +260,10 @@
 
   ! colors inner elements
   do while(nb_already_colored < nspec_domain)
+
+    334 continue
     icolor = icolor + 1
 
-    334 continue
     !if(myrank == 0) print *,'analyzing color ',icolor,' for all the elements of the mesh'
 
     ! debug: user output
@@ -300,7 +319,6 @@
     !endif
 
     if(conflict_found_need_new_color) then
-      icolor = icolor + 1
       if( icolor >= MAX_NUMBER_OF_COLORS ) stop 'error MAX_NUMBER_OF_COLORS too small'
       goto 334
     endif
@@ -308,13 +326,235 @@
 
   nb_colors_inner_elements = icolor - nb_colors_outer_elements
 
-  ! debug
-  !if(myrank == 0) then
-  !  print *
-  !  print *,'  created a total of ',maxval(color),' colors in this domain' ! 'for all the domain elements of the mesh'
-  !  print *
-  !endif
+!!!!!!!!!! DK DK Nov 2011 added this to create more balanced colors according to JJ Droux (1993)
 
+  ! Droux optimization: might not find an optimial solution.
+  ! we will probably have to try a few times with increasing colors
+  if(try_Droux_coloring) then
+
+    ! debug outupt
+    if( myrank == 0 ) then
+      write(IMAIN,*) '     fast algorithm: number of outer element colors = ',nb_colors_outer_elements
+      write(IMAIN,*) '     fast algorithm: number of inner element colors = ',nb_colors_inner_elements,' total:', &
+        nb_colors_outer_elements + nb_colors_inner_elements
+      write(IMAIN,*) '     fast algorithm: number of total colors = ',nb_colors_outer_elements + nb_colors_inner_elements
+    endif
+
+    !! DK DK Nov 2011: give a lower bound for the number of colors needed
+    allocate( count_ibool(nglob))
+
+    ! valence numbers of the mesh
+    maxval_count_ibool_outer = 0
+    maxval_count_ibool_inner = 0
+
+    ! valence for outer elements
+    count_ibool(:) = 0
+    do ispec = 1,nspec
+      ! domain elements only
+      if(ispec_is_d(ispec)) then
+        ! outer elements
+        if (is_on_a_slice_edge(ispec)) then
+          ! the eight corners of the current element
+          iglob1=ibool(1,1,1,ispec)
+          iglob2=ibool(NGLLX,1,1,ispec)
+          iglob3=ibool(NGLLX,NGLLY,1,ispec)
+          iglob4=ibool(1,NGLLY,1,ispec)
+          iglob5=ibool(1,1,NGLLZ,ispec)
+          iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+          iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+          iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+
+          count_ibool(iglob1) = count_ibool(iglob1) + 1
+          count_ibool(iglob2) = count_ibool(iglob2) + 1
+          count_ibool(iglob3) = count_ibool(iglob3) + 1
+          count_ibool(iglob4) = count_ibool(iglob4) + 1
+          count_ibool(iglob5) = count_ibool(iglob5) + 1
+          count_ibool(iglob6) = count_ibool(iglob6) + 1
+          count_ibool(iglob7) = count_ibool(iglob7) + 1
+          count_ibool(iglob8) = count_ibool(iglob8) + 1
+        endif
+      endif
+    enddo
+    maxval_count_ibool_outer = maxval(count_ibool)
+
+    ! valence for inner elements
+    count_ibool(:) = 0
+    do ispec = 1,nspec
+      ! domain elements only
+      if(ispec_is_d(ispec)) then
+        ! inner elements
+        if (.not. is_on_a_slice_edge(ispec)) then
+          ! the eight corners of the current element
+          iglob1=ibool(1,1,1,ispec)
+          iglob2=ibool(NGLLX,1,1,ispec)
+          iglob3=ibool(NGLLX,NGLLY,1,ispec)
+          iglob4=ibool(1,NGLLY,1,ispec)
+          iglob5=ibool(1,1,NGLLZ,ispec)
+          iglob6=ibool(NGLLX,1,NGLLZ,ispec)
+          iglob7=ibool(NGLLX,NGLLY,NGLLZ,ispec)
+          iglob8=ibool(1,NGLLY,NGLLZ,ispec)
+
+          count_ibool(iglob1) = count_ibool(iglob1) + 1
+          count_ibool(iglob2) = count_ibool(iglob2) + 1
+          count_ibool(iglob3) = count_ibool(iglob3) + 1
+          count_ibool(iglob4) = count_ibool(iglob4) + 1
+          count_ibool(iglob5) = count_ibool(iglob5) + 1
+          count_ibool(iglob6) = count_ibool(iglob6) + 1
+          count_ibool(iglob7) = count_ibool(iglob7) + 1
+          count_ibool(iglob8) = count_ibool(iglob8) + 1
+        endif
+      endif
+    enddo
+    maxval_count_ibool_inner = maxval(count_ibool)
+
+    ! debug outupt
+    if( myrank == 0 ) then
+      write(IMAIN,*) '     maximum valence (i.e. minimum possible nb of colors) for outer = ',maxval_count_ibool_outer
+      write(IMAIN,*) '     maximum valence (i.e. minimum possible nb of colors) for inner = ',maxval_count_ibool_inner
+    endif
+
+    deallocate(count_ibool)
+
+    ! initial guess of number of colors needed
+    if( maxval_count_ibool_inner > 0 .and. maxval_count_ibool_inner < nb_colors_inner_elements ) then
+      ! uses maximum valence to estimate number of colors for Droux
+      nb_colors_inner_elements = maxval_count_ibool_inner
+    endif
+
+    !! DK DK Nov 2011: give a lower bound for the number of colors needed
+
+    !! DK DK do it for inner elements only for now
+    nb_already_colored = nspec_outer
+
+    765 continue
+
+    ! initial guess of number of colors needed
+    ncolors = nb_colors_outer_elements + nb_colors_inner_elements
+
+    ! debug output
+    if( myrank == 0 ) then
+      write(IMAIN,*) '     Droux optimization: try = ',nb_tries_of_Droux_1993,'colors = ',ncolors
+    endif
+
+    icolormin = nb_colors_outer_elements + 1
+    icolormax = ncolors
+
+    allocate(nb_elems_in_this_color(ncolors))
+    allocate(icolor_conflict_found(ncolors))
+
+    nb_elems_in_this_color(:) = 0
+    mask_ibool(:) = .false.
+    last_ispec_studied = -1
+
+    do ispec = 1,nspec
+      ! domain elements only
+      if(ispec_is_d(ispec)) then
+
+        ! only inner elements
+        if (is_on_a_slice_edge(ispec)) cycle
+
+        ! unmark the eight corners of the previously marked element
+        if(last_ispec_studied > 0) then
+          mask_ibool(ibool(1,1,1,last_ispec_studied)) = .false.
+          mask_ibool(ibool(NGLLX,1,1,last_ispec_studied)) = .false.
+          mask_ibool(ibool(NGLLX,NGLLY,1,last_ispec_studied)) = .false.
+          mask_ibool(ibool(1,NGLLY,1,last_ispec_studied)) = .false.
+          mask_ibool(ibool(1,1,NGLLZ,last_ispec_studied)) = .false.
+          mask_ibool(ibool(NGLLX,1,NGLLZ,last_ispec_studied)) = .false.
+          mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,last_ispec_studied)) = .false.
+          mask_ibool(ibool(1,NGLLY,NGLLZ,last_ispec_studied)) = .false.
+        endif
+        icolor_conflict_found(icolormin:icolormax) = .false.
+
+        ! mark the eight corners of the current element
+        mask_ibool(ibool(1,1,1,ispec)) = .true.
+        mask_ibool(ibool(NGLLX,1,1,ispec)) = .true.
+        mask_ibool(ibool(NGLLX,NGLLY,1,ispec)) = .true.
+        mask_ibool(ibool(1,NGLLY,1,ispec)) = .true.
+        mask_ibool(ibool(1,1,NGLLZ,ispec)) = .true.
+        mask_ibool(ibool(NGLLX,1,NGLLZ,ispec)) = .true.
+        mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec)) = .true.
+        mask_ibool(ibool(1,NGLLY,NGLLZ,ispec)) = .true.
+        last_ispec_studied = ispec
+
+        if(ispec > 1) then
+          do ispec2 = 1,ispec - 1
+            ! domain elements only
+            if(ispec_is_d(ispec)) then
+
+              ! only inner elements
+              if (is_on_a_slice_edge(ispec2)) cycle
+
+              ! if conflict already found previously with this color, no need to test again
+              if (icolor_conflict_found(color(ispec2))) cycle
+
+              ! test the eight corners of the current element for a common point with element under study
+              if (mask_ibool(ibool(1,1,1,ispec2)) .or. &
+                  mask_ibool(ibool(NGLLX,1,1,ispec2)) .or. &
+                  mask_ibool(ibool(NGLLX,NGLLY,1,ispec2)) .or. &
+                  mask_ibool(ibool(1,NGLLY,1,ispec2)) .or. &
+                  mask_ibool(ibool(1,1,NGLLZ,ispec2)) .or. &
+                  mask_ibool(ibool(NGLLX,1,NGLLZ,ispec2)) .or. &
+                  mask_ibool(ibool(NGLLX,NGLLY,NGLLZ,ispec2)) .or. &
+                  mask_ibool(ibool(1,NGLLY,NGLLZ,ispec2))) &
+                icolor_conflict_found(color(ispec2)) = .true.
+            endif ! domain elements
+          enddo
+        endif
+
+        ! check if the Droux 1993 algorithm found a solution
+        if (all(icolor_conflict_found(icolormin:icolormax))) then
+          ! user output
+          !if(myrank == 0) write(IMAIN,*) '     Droux 1993 algorithm did not find any solution for ncolors = ',ncolors
+
+          ! try with one more color
+          if(nb_tries_of_Droux_1993 < MAX_NB_TRIES_OF_DROUX_1993) then
+            nb_colors_inner_elements = nb_colors_inner_elements + 1
+            deallocate(nb_elems_in_this_color)
+            deallocate(icolor_conflict_found)
+            nb_tries_of_Droux_1993 = nb_tries_of_Droux_1993 + 1
+            goto 765
+          else
+            ! fail-safe mechanism: if Droux 1993 still fails after all the tries with one more color,
+            ! then go back to my original simple and fast coloring algorithm
+            try_Droux_coloring = .false.
+            if(myrank == 0) write(IMAIN,*) '     giving up on Droux 1993 algorithm, calling fail-safe mechanism'
+            goto 999
+          endif
+        endif
+
+        ! loop on all the colors to determine the color with the smallest number
+        ! of elements and for which there is no conflict
+        nb_elems_in_color_chosen = 2147000000 ! start with extremely large unrealistic value
+        do icolor = icolormin,icolormax
+          if (.not. icolor_conflict_found(icolor) .and. nb_elems_in_this_color(icolor) < nb_elems_in_color_chosen) then
+            icolor_chosen = icolor
+            nb_elems_in_color_chosen = nb_elems_in_this_color(icolor)
+          endif
+        enddo
+
+        ! store the color finally chosen
+        color(ispec) = icolor_chosen
+        nb_elems_in_this_color(icolor_chosen) = nb_elems_in_this_color(icolor_chosen) + 1
+
+      endif ! domain elements
+    enddo
+
+    ! debug output
+    if(myrank == 0) then
+      write(IMAIN,*) '     created a total of ',maxval(color),' colors in this domain' ! 'for all the domain elements of the mesh'
+      if( nb_colors_outer_elements > 0 ) &
+        write(IMAIN,*) '     typical nb of elements per color for outer elements should be ', &
+          nspec_outer / nb_colors_outer_elements
+      if( nb_colors_inner_elements > 0 ) &
+        write(IMAIN,*) '     typical nb of elements per color for inner elements should be ', &
+          nspec_inner / nb_colors_inner_elements
+    endif
+
+  endif ! of if(try_Droux_coloring)
+
+!!!!!!!!!! DK DK Nov 2011 added this again to create more balanced colors according to JJ Droux (1993)
+
   !!!!!!!! DK DK now check that all the color sets are independent
   do icolor = 1,maxval(color)
     mask_ibool(:) = .false.
@@ -352,25 +592,26 @@
       endif
     enddo
 
-    !debug
+    !debug output
     !if(myrank == 0) print *,'  color ',icolor,' has disjoint elements only and is therefore OK'
     !if(myrank == 0) print *,'  color ',icolor,' contains ',count(color == icolor),' elements'
   enddo
 
-  ! debug
-  !if(myrank == 0) then
-  !  print *,'  the ',maxval(color),' color sets are OK'
-  !  print *
-  !endif
+  ! debug output
+  if(myrank == 0) then
+    !print*, '     the ',maxval(color),' color sets are OK'
+  endif
 
   deallocate(mask_ibool)
 
   end subroutine get_color_faster
 
+
 !
 !-------------------------------------------------------------------------------------------------
 !
 
+
   subroutine get_final_perm(color,perm,first_elem_number_in_this_color, &
                             nspec,nb_colors,nb_colors_outer_elements, &
                             ispec_is_d,nspec_domain)

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/constants.h.in	2011-11-17 01:58:40 UTC (rev 19212)
@@ -216,9 +216,6 @@
 ! that completes an orthonormal.
   logical, parameter :: EXT_MESH_RECV_NORMAL = .false.
 
-! shakemaps and movies can not be generated during the same run. Mutually exclusive.
-  logical, parameter :: EXTERNAL_MESH_MOVIE_SURFACE = .false.
-  logical, parameter :: EXTERNAL_MESH_CREATE_SHAKEMAP = .false.
 
 !!-----------------------------------------------------------
 !!
@@ -226,6 +223,10 @@
 !!
 !!-----------------------------------------------------------
 
+! shakemaps and movies can not be generated during the same run. Mutually exclusive.
+  logical, parameter :: EXTERNAL_MESH_MOVIE_SURFACE = .false.
+  logical, parameter :: EXTERNAL_MESH_CREATE_SHAKEMAP = .false.
+
 ! plots VTK cross-section planes instead of model surface
 ! (EXPERIMENTAL feature)
 ! (requires EXTERNAL_MESH_MOVIE_SURFACE set to true)
@@ -257,6 +258,11 @@
   logical, parameter :: USE_MESH_COLORING_GPU = .false.
   integer, parameter :: MAX_NUMBER_OF_COLORS = 10000
 
+! enhanced coloring using Droux algorithm
+! try several times with one more color before giving up
+  logical, parameter :: USE_DROUX_OPTIMIZATION = .false.
+  integer, parameter :: MAX_NB_TRIES_OF_DROUX_1993 = 15
+
 !------------------------------------------------------
 !----------- do not modify anything below -------------
 !------------------------------------------------------
@@ -358,12 +364,44 @@
 ! flag for projection from latitude/longitude to UTM, and back
   integer, parameter :: ILONGLAT2UTM = 0, IUTM2LONGLAT = 1
 
+!!-----------------------------------------------------------
+!!
+!! OCEANS load approximation
+!!
+!!-----------------------------------------------------------
 ! minimum thickness in meters to include the effect of the oceans
 ! to avoid taking into account spurious oscillations in topography model
   double precision, parameter :: MINIMUM_THICKNESS_3D_OCEANS = 10.d0
 ! density of sea water
   real(kind=CUSTOM_REAL), parameter :: RHO_OCEANS = 1020.0
 
+!!-----------------------------------------------------------
+!!
+!! GRAVITY
+!!
+!!-----------------------------------------------------------
+! gravitational constant
+  double precision, parameter :: GRAV = 6.6723d-11
+! number of layers in PREM
+  integer, parameter :: NR = 640
+! for lookup table for gravity every 100 m in radial direction of Earth model
+  integer, parameter :: NRAD_GRAVITY = 70000
+! R_EARTH is the radius of the bottom of the oceans (radius of Earth in m)
+  double precision, parameter :: R_EARTH = 6371000.d0
+! same radius in km
+  double precision, parameter :: R_EARTH_KM = R_EARTH / 1000.d0
+! radius of the Earth for gravity calculation
+  double precision, parameter :: R_EARTH_GRAVITY = 6371000.d0
+! radius of the ocean floor for gravity calculation
+  double precision, parameter :: ROCEAN_GRAVITY = 6368000.d0
+! average density in the full Earth to normalize equation
+  double precision, parameter :: RHOAV = 5514.3d0
+
+!!-----------------------------------------------------------
+!!
+!! DOMAINS
+!!
+!!-----------------------------------------------------------
 ! material domain ids
   integer, parameter :: IDOMAIN_ACOUSTIC    = 1
   integer, parameter :: IDOMAIN_ELASTIC     = 2

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/create_movie_shakemap_AVS_DX_GMT.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -35,10 +35,14 @@
   implicit none
 
   include "constants.h"
-  include "surface_from_mesher.h"
+  include "../../in_out_files/OUTPUT_FILES/surface_from_mesher.h"
 
 !-------------------------------------------------------------------------------------------------
 ! user parameters
+
+! normalizes field display values
+  logical, parameter :: NORMALIZE_OUTPUT = .false.
+
 ! threshold in percent of the maximum below which we cut the amplitude
   logical, parameter :: APPLY_THRESHOLD = .false.
   real(kind=CUSTOM_REAL), parameter :: THRESHOLD = 1._CUSTOM_REAL / 100._CUSTOM_REAL
@@ -548,44 +552,44 @@
       min_field_current = minval(field_display(:))
       max_field_current = maxval(field_display(:))
 
-      ! print minimum and maximum amplitude in current snapshot
-      print *
-      print *,'minimum amplitude in current snapshot = ',min_field_current
-      print *,'maximum amplitude in current snapshot = ',max_field_current
-      print *
-
       if(plot_shaking_map) then
-        ! compute min and max of data value to normalize
-        min_field_current = minval(field_display(:))
-        max_field_current = maxval(field_display(:))
         ! print minimum and maximum amplitude in current snapshot
         print *
         print *,'minimum amplitude in current snapshot after removal = ',min_field_current
         print *,'maximum amplitude in current snapshot after removal = ',max_field_current
         print *
+      else
+        ! print minimum and maximum amplitude in current snapshot
+        print *
+        print *,'minimum amplitude in current snapshot = ',min_field_current
+        print *,'maximum amplitude in current snapshot = ',max_field_current
+        print *
       endif
 
       ! apply scaling in all cases for movies
       if(.not. plot_shaking_map) then
 
-        ! make sure range is always symmetric and center is in zero
-        ! this assumption works only for fields that can be negative
-        ! would not work for norm of vector for instance
-        ! (we would lose half of the color palette if no negative values)
-        max_absol = max(abs(min_field_current),abs(max_field_current))
-        min_field_current = - max_absol
-        max_field_current = + max_absol
+        ! normalizes values
+        if( NORMALIZE_OUTPUT ) then
+          ! make sure range is always symmetric and center is in zero
+          ! this assumption works only for fields that can be negative
+          ! would not work for norm of vector for instance
+          ! (we would lose half of the color palette if no negative values)
+          max_absol = max(abs(min_field_current),abs(max_field_current))
+          min_field_current = - max_absol
+          max_field_current = + max_absol
 
-        ! normalize field to [0:1]
-        if( abs(max_field_current - min_field_current) > TINYVAL ) &
-          field_display(:) = (field_display(:) - min_field_current) / (max_field_current - min_field_current)
+          ! normalize field to [0:1]
+          if( abs(max_field_current - min_field_current) > TINYVAL ) &
+            field_display(:) = (field_display(:) - min_field_current) / (max_field_current - min_field_current)
 
-        ! rescale to [-1,1]
-        field_display(:) = 2.*field_display(:) - 1.
+          ! rescale to [-1,1]
+          field_display(:) = 2.*field_display(:) - 1.
 
-        ! apply threshold to normalized field
-        if(APPLY_THRESHOLD) &
-          where(abs(field_display(:)) <= THRESHOLD) field_display = 0.
+          ! apply threshold to normalized field
+          if(APPLY_THRESHOLD) &
+            where(abs(field_display(:)) <= THRESHOLD) field_display = 0.
+        endif
 
         ! apply non linear scaling to normalized field if needed
         if(NONLINEAR_SCALING) then
@@ -596,13 +600,15 @@
           endwhere
         endif
 
-        ! map back to [0,1]
-        field_display(:) = (field_display(:) + 1.) / 2.
+        ! normalizes values
+        if( NORMALIZE_OUTPUT ) then
+          ! map back to [0,1]
+          field_display(:) = (field_display(:) + 1.) / 2.
 
-        ! map field to [0:255] for AVS color scale
-        field_display(:) = 255. * field_display(:)
+          ! map field to [0:255] for AVS color scale
+          field_display(:) = 255. * field_display(:)
+        endif
 
-
       ! apply scaling only if selected for shaking map
       else if(NONLINEAR_SCALING .and. iscaling_shake == 1) then
 
@@ -668,21 +674,21 @@
 
       if(USE_GMT) then
 
-! output list of points
-         mask_point = .false.
-         do ispec=1,nspectot_AVS_max
-            ieoff = NGNOD2D_AVS_DX*(ispec-1)
-! four points for each element
-            do ilocnum = 1,NGNOD2D_AVS_DX
-               ibool_number = iglob(ilocnum+ieoff)
-               if(.not. mask_point(ibool_number)) then
-                  call utm_geo(long,lat,xp_save(ilocnum+ieoff),yp_save(ilocnum+ieoff), &
-                       UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
-                  write(11,*) long,lat,field_display(ilocnum+ieoff)
-               endif
-               mask_point(ibool_number) = .true.
-            enddo
-         enddo
+        ! output list of points
+        mask_point = .false.
+        do ispec=1,nspectot_AVS_max
+          ieoff = NGNOD2D_AVS_DX*(ispec-1)
+          ! four points for each element
+          do ilocnum = 1,NGNOD2D_AVS_DX
+            ibool_number = iglob(ilocnum+ieoff)
+            if(.not. mask_point(ibool_number)) then
+              call utm_geo(long,lat,xp_save(ilocnum+ieoff),yp_save(ilocnum+ieoff), &
+                      UTM_PROJECTION_ZONE,IUTM2LONGLAT,SUPPRESS_UTM_PROJECTION)
+              write(11,*) long,lat,field_display(ilocnum+ieoff)
+            endif
+            mask_point(ibool_number) = .true.
+          enddo
+        enddo
 
       else
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_parameter_file.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_parameter_file.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/shared/read_parameter_file.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -218,21 +218,25 @@
 !-------------------------------------------------------------------------------------------------
 !
 
-  subroutine read_gpu_mode(GPU_MODE)
+  subroutine read_gpu_mode(GPU_MODE,GRAVITY)
 
   implicit none
   include "constants.h"
 
-  logical GPU_MODE
+  logical GPU_MODE,GRAVITY
+
+  ! initializes flags
+  GPU_MODE = .false.
+  GRAVITY = .false.
+
   ! opens file Par_file
-
   call open_parameter_file()
 
   call read_value_logical(GPU_MODE, 'solver.GPU_MODE')
+  call read_value_logical(GRAVITY, 'solver.GRAVITY')
 
   ! close parameter file
   call close_parameter_file()
 
   end subroutine read_gpu_mode
 
-

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/Makefile.in	2011-11-17 01:58:40 UTC (rev 19212)
@@ -189,6 +189,7 @@
 	$O/save_adjoint_kernels.o \
 	$O/specfem3D.o \
 	$O/assemble_MPI_vector.o \
+	$O/make_gravity.o \
 	$O/noise_tomography.o 
 
 

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_gradient.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_gradient.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_gradient.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -29,14 +29,18 @@
                         scalar_field, vector_field_element,&
                         hprime_xx,hprime_yy,hprime_zz, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        ibool,rhostore)
+                        ibool,rhostore,GRAVITY)
 
 ! calculates gradient of given acoustic scalar (potential) field on all GLL points in one, single element
 ! note:
 !   displacement s = (rho)^{-1} \del \chi
 !   velocity          v = (rho)^{-1} \del \ddot \chi
 !
+!  in case of gravity:
+!   displacement s = \del \chi
+!   velocity          v = \del \ddot \chi
 ! returns: (1/rho) times gradient vector field (vector_field_element) in specified element
+!             or in gravity case, just gradient vector field
 
   implicit none
   include 'constants.h'
@@ -59,6 +63,8 @@
   real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy
   real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz
 
+  logical :: GRAVITY
+
 ! local parameters
   real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl
   real(kind=CUSTOM_REAL) temp1l,temp2l,temp3l
@@ -100,7 +106,12 @@
         gammayl = gammay(i,j,k,ispec)
         gammazl = gammaz(i,j,k,ispec)
 
-        rho_invl = 1.0_CUSTOM_REAL / rhostore(i,j,k,ispec)
+        ! daniel: TODO - check gravity case here
+        if( GRAVITY ) then
+          rho_invl = 1.0_CUSTOM_REAL / rhostore(i,j,k,ispec)
+        else
+          rho_invl = 1.0_CUSTOM_REAL / rhostore(i,j,k,ispec)
+        endif
 
         ! derivatives of acoustic scalar potential field on GLL points
         vector_field_element(1,i,j,k) = (temp1l*xixl + temp2l*etaxl + temp3l*gammaxl) * rho_invl

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/compute_kernels.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -200,13 +200,13 @@
                       b_potential_acoustic, b_displ_elm,&
                       hprime_xx,hprime_yy,hprime_zz, &
                       xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                      ibool,rhostore)
+                      ibool,rhostore,GRAVITY)
       ! adjoint fields: acceleration vector
       call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
                       potential_dot_dot_acoustic, accel_elm,&
                       hprime_xx,hprime_yy,hprime_zz, &
                       xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                      ibool,rhostore)
+                      ibool,rhostore,GRAVITY)
 
       do k = 1, NGLLZ
         do j = 1, NGLLY
@@ -271,14 +271,14 @@
                       potential_dot_dot_acoustic, accel_elm,&
                       hprime_xx,hprime_yy,hprime_zz, &
                       xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                      ibool,rhostore)
+                      ibool,rhostore,GRAVITY)
 
       ! adjoint fields: acceleration vector
       call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
                       b_potential_dot_dot_acoustic, b_accel_elm,&
                       hprime_xx,hprime_yy,hprime_zz, &
                       xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                      ibool,rhostore)
+                      ibool,rhostore,GRAVITY)
 
       do k = 1, NGLLZ
         do j = 1, NGLLY

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/create_color_image.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -33,32 +33,32 @@
   ! USER PARAMETER
 
   ! image data output:
-  !   type = 1 : velocity V_x component
-  !   type = 2 : velocity V_y component
-  !   type = 3 : velocity V_z component
-  !   type = 4 : velocity V norm
-  integer,parameter:: IMAGE_TYPE = 4
+  !   type = 1 : displ/velocity x-component
+  !   type = 2 : displ/velocity y-component
+  !   type = 3 : displ/velocity z-component
+  !   type = 4 : displ/velocity norm
+  integer,parameter:: IMAGE_TYPE = 3 ! 4
 
   ! cross-section surface
   ! cross-section origin point
-  real(kind=CUSTOM_REAL),parameter:: section_xorg = 67000.0
-  real(kind=CUSTOM_REAL),parameter:: section_yorg = 0.0
-  real(kind=CUSTOM_REAL),parameter:: section_zorg = 0.0
+  real(kind=CUSTOM_REAL),parameter:: section_xorg = 0.0 ! 67000.0
+  real(kind=CUSTOM_REAL),parameter:: section_yorg = 0.0 ! 0.0
+  real(kind=CUSTOM_REAL),parameter:: section_zorg = -100.0 ! 0.0
 
   ! cross-section surface normal
-  real(kind=CUSTOM_REAL),parameter:: section_nx = 1.0
-  real(kind=CUSTOM_REAL),parameter:: section_ny = 0.0
-  real(kind=CUSTOM_REAL),parameter:: section_nz = 0.0
+  real(kind=CUSTOM_REAL),parameter:: section_nx = 0.0 !1.0
+  real(kind=CUSTOM_REAL),parameter:: section_ny = 0.0 !0.0
+  real(kind=CUSTOM_REAL),parameter:: section_nz = 1.0 !0.0
 
   ! cross-section (in-plane) horizontal-direction
-  real(kind=CUSTOM_REAL),parameter:: section_hdirx = 0.0
-  real(kind=CUSTOM_REAL),parameter:: section_hdiry = 1.0
-  real(kind=CUSTOM_REAL),parameter:: section_hdirz = 0.0
+  real(kind=CUSTOM_REAL),parameter:: section_hdirx = 1.0 ! 0.0
+  real(kind=CUSTOM_REAL),parameter:: section_hdiry = 0.0 !1.0
+  real(kind=CUSTOM_REAL),parameter:: section_hdirz = 0.0 ! 0.0
 
   ! cross-section (in-plane) vertical-direction
-  real(kind=CUSTOM_REAL),parameter:: section_vdirx = 0.0
-  real(kind=CUSTOM_REAL),parameter:: section_vdiry = 0.0
-  real(kind=CUSTOM_REAL),parameter:: section_vdirz = 1.0
+  real(kind=CUSTOM_REAL),parameter:: section_vdirx = 0.0 ! 0.0
+  real(kind=CUSTOM_REAL),parameter:: section_vdiry = 1.0 ! 0.0
+  real(kind=CUSTOM_REAL),parameter:: section_vdirz = 0.0 ! 1.0
 
   ! non linear display to enhance small amplitudes in color images
   real(kind=CUSTOM_REAL), parameter :: POWER_DISPLAY_COLOR = 0.30_CUSTOM_REAL
@@ -511,7 +511,7 @@
   implicit none
 
   ! local parameters
-  real(kind=CUSTOM_REAL),dimension(NDIM) :: veloc_val
+  real(kind=CUSTOM_REAL),dimension(NDIM) :: val_vector
   real(kind=CUSTOM_REAL):: temp
   integer :: i,j,k,iglob,ispec,iproc
 
@@ -528,15 +528,15 @@
     ispec = ispec_image_color(i,j)
 
     ! gets velocity for point iglob
-    call get_iglob_veloc(iglob,ispec,veloc_val)
+    call get_iglob_veloc(iglob,ispec,val_vector)
 
     ! data type
     if( IMAGE_TYPE == 4 ) then
       ! velocity norm
-      temp = sqrt( veloc_val(1)**2 + veloc_val(2)**2 + veloc_val(3)**2 )
+      temp = sqrt( val_vector(1)**2 + val_vector(2)**2 + val_vector(3)**2 )
     else
       ! velocity component
-      temp = veloc_val(IMAGE_TYPE)
+      temp = val_vector(IMAGE_TYPE)
     endif
 
     ! stores data
@@ -834,34 +834,48 @@
 
 !=============================================================
 
-  subroutine get_iglob_veloc(iglob,ispec,veloc_val)
+  subroutine get_iglob_veloc(iglob,ispec,val_vector)
 
   use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM
-  use specfem_par_acoustic,only: ACOUSTIC_SIMULATION,potential_dot_acoustic,&
-                                rhostore,ispec_is_acoustic,b_potential_dot_acoustic
-  use specfem_par_elastic,only: ELASTIC_SIMULATION,veloc,ispec_is_elastic,b_veloc
+  use specfem_par_acoustic,only: ACOUSTIC_SIMULATION,potential_acoustic,potential_dot_acoustic, &
+                                rhostore,ispec_is_acoustic, &
+                                b_potential_acoustic,b_potential_dot_acoustic
+  use specfem_par_elastic,only: ELASTIC_SIMULATION,displ,veloc, &
+                                ispec_is_elastic,b_displ,b_veloc
   use specfem_par,only: NSPEC_AB,NGLOB_AB,hprime_xx,hprime_yy,hprime_zz, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        ibool,SIMULATION_TYPE
+                        ibool,SIMULATION_TYPE,GRAVITY
+  use specfem_par_movie,only:SAVE_DISPLACEMENT
   implicit none
 
   integer,intent(in) :: iglob,ispec
-  real(kind=CUSTOM_REAL),dimension(NDIM),intent(out):: veloc_val
+  real(kind=CUSTOM_REAL),dimension(NDIM),intent(out):: val_vector
 
   ! local parameters
-  real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ):: veloc_element
+  real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ):: val_element
   integer :: i,j,k
 
   ! returns first element encountered for iglob index
   if( ELASTIC_SIMULATION ) then
     if( ispec_is_elastic(ispec) ) then
-      if( SIMULATION_TYPE == 3 ) then
-        ! to display re-constructed wavefield
-        !veloc_val(:) = b_veloc(:,iglob)
-        ! to display adjoint wavefield
-        veloc_val(:) = veloc(:,iglob)
+      if(SAVE_DISPLACEMENT) then
+        if( SIMULATION_TYPE == 3 ) then
+          ! to display re-constructed wavefield
+          !val_vector(:) = b_displ(:,iglob)
+          ! to display adjoint wavefield
+          val_vector(:) = displ(:,iglob)
+        else
+          val_vector(:) = displ(:,iglob)
+        endif
       else
-        veloc_val(:) = veloc(:,iglob)
+        if( SIMULATION_TYPE == 3 ) then
+          ! to display re-constructed wavefield
+          !val_vector(:) = b_veloc(:,iglob)
+          ! to display adjoint wavefield
+          val_vector(:) = veloc(:,iglob)
+        else
+          val_vector(:) = veloc(:,iglob)
+        endif
       endif
 
       ! returns with this result
@@ -871,20 +885,38 @@
 
   if( ACOUSTIC_SIMULATION ) then
     if( ispec_is_acoustic(ispec) ) then
-      if( SIMULATION_TYPE == 3 ) then
-        ! velocity vector for backward/reconstructed wavefield
-        call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
-                          b_potential_dot_acoustic, veloc_element,&
+      if(SAVE_DISPLACEMENT) then
+        if( SIMULATION_TYPE == 3 ) then
+          ! displacement vector from backward potential
+          call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+                          b_potential_acoustic, val_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)
+                          ibool,rhostore,GRAVITY)
+        else
+          ! displacement vector
+          call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+                          potential_acoustic, val_element,&
+                          hprime_xx,hprime_yy,hprime_zz, &
+                          xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                          ibool,rhostore,GRAVITY)
+        endif
       else
-        ! velocity vector
-        call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
-                          potential_dot_acoustic, veloc_element,&
+        if( SIMULATION_TYPE == 3 ) then
+          ! velocity vector for backward/reconstructed wavefield
+          call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+                          b_potential_dot_acoustic, val_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)
+                          ibool,rhostore,GRAVITY)
+        else
+          ! velocity vector
+          call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+                          potential_dot_acoustic, val_element,&
+                          hprime_xx,hprime_yy,hprime_zz, &
+                          xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                          ibool,rhostore,GRAVITY)
+        endif
       endif
 
       ! returns corresponding iglob velocity entry
@@ -892,7 +924,7 @@
         do j=1,NGLLY
           do i=1,NGLLX
             if( ibool(i,j,k,ispec) == iglob ) then
-              veloc_val(:) = veloc_element(:,i,j,k)
+              val_vector(:) = val_element(:,i,j,k)
               return
             endif
           enddo

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/initialize_simulation.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -49,7 +49,7 @@
                         NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY)
 
   ! GPU_MODE is in par_file
-  call read_gpu_mode(GPU_MODE)
+  call read_gpu_mode(GPU_MODE,GRAVITY)
 
   ! get the base pathname for output files
   call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', OUTPUT_FILES_PATH(1:len_trim(OUTPUT_FILES_PATH)))
@@ -218,7 +218,10 @@
         stop 'GPU mode does not support N_SLS /= 3 yet'
     endif
   endif
+  if( .not. GPU_MODE .and. GRAVITY ) &
+    stop 'GRAVITY only supported in GPU mode'
 
+
   ! absorbing surfaces
   if( ABSORBING_CONDITIONS ) then
     ! for arbitrary orientation of elements, which face belongs to xmin,xmax,etc... -

Added: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/make_gravity.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/make_gravity.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/make_gravity.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -0,0 +1,680 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 0
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+!
+! United States and French Government Sponsorship Acknowledged.
+
+
+  subroutine make_gravity(nspl,rspl,gspl,gspl2, &
+                          ROCEAN,RMIDDLE_CRUST,RMOHO,R80,R220,R400,R600,R670, &
+                          R771,RTOPDDOUBLEPRIME,RCMB,RICB)
+
+! creates a spline for the gravity profile in PREM
+! radius and density are non-dimensional
+
+  implicit none
+
+  include "constants.h"
+
+  integer:: nspl
+  double precision:: rspl(NR),gspl(NR),gspl2(NR)
+  double precision ROCEAN,RMIDDLE_CRUST,RMOHO,R80,R220,R400,R600,R670, &
+                   R771,RTOPDDOUBLEPRIME,RCMB,RICB
+
+  ! local parameters
+  double precision r_icb,r_cmb,r_topddoubleprime,r_771,r_670,r_600
+  double precision r_400,r_220,r_80,r_moho,r_middle_crust,r_ocean,r_0
+  double precision r(NR),rho(NR),g(NR),i_rho
+  double precision s1(NR),s2(NR),s3(NR)
+  double precision yp1,ypn
+  integer i
+
+! PREM
+  ! values in (m)
+  ROCEAN = 6368000.d0
+  RMIDDLE_CRUST = 6356000.d0
+  RMOHO = 6346600.d0 ! PREM moho depth at 24.4 km
+  R80  = 6291000.d0
+  R220 = 6151000.d0
+  R400 = 5971000.d0
+  R600 = 5771000.d0
+  R670 = 5701000.d0
+  R771 = 5600000.d0
+  RTOPDDOUBLEPRIME = 3630000.d0
+  RCMB = 3480000.d0
+  RICB = 1221000.d0
+
+! non-dimensionalize
+  r_icb = RICB/R_EARTH_GRAVITY
+  r_cmb = RCMB/R_EARTH_GRAVITY
+  r_topddoubleprime = RTOPDDOUBLEPRIME/R_EARTH_GRAVITY
+  r_771 = R771/R_EARTH_GRAVITY
+  r_670 = R670/R_EARTH_GRAVITY
+  r_600 = R600/R_EARTH_GRAVITY
+  r_400 = R400/R_EARTH_GRAVITY
+  r_220 = R220/R_EARTH_GRAVITY
+  r_80 = R80/R_EARTH_GRAVITY
+  r_moho = RMOHO/R_EARTH_GRAVITY
+  r_middle_crust = RMIDDLE_CRUST/R_EARTH_GRAVITY
+  r_ocean = ROCEAN_GRAVITY/R_EARTH_GRAVITY
+  r_0 = 1.d0
+
+  do i=1,163
+    r(i) = r_icb*dble(i-1)/dble(162)
+  enddo
+  do i=164,323
+    r(i) = r_icb+(r_cmb-r_icb)*dble(i-164)/dble(159)
+  enddo
+  do i=324,336
+    r(i) = r_cmb+(r_topddoubleprime-r_cmb)*dble(i-324)/dble(12)
+  enddo
+  do i=337,517
+    r(i) = r_topddoubleprime+(r_771-r_topddoubleprime)*dble(i-337)/dble(180)
+  enddo
+  do i=518,530
+    r(i) = r_771+(r_670-r_771)*dble(i-518)/dble(12)
+  enddo
+  do i=531,540
+    r(i) = r_670+(r_600-r_670)*dble(i-531)/dble(9)
+  enddo
+  do i=541,565
+    r(i) = r_600+(r_400-r_600)*dble(i-541)/dble(24)
+  enddo
+  do i=566,590
+    r(i) = r_400+(r_220-r_400)*dble(i-566)/dble(24)
+  enddo
+  do i=591,609
+    r(i) = r_220+(r_80-r_220)*dble(i-591)/dble(18)
+  enddo
+  do i=610,619
+    r(i) = r_80+(r_moho-r_80)*dble(i-610)/dble(9)
+  enddo
+  do i=620,626
+    r(i) = r_moho+(r_middle_crust-r_moho)*dble(i-620)/dble(6)
+  enddo
+  do i=627,633
+    r(i) = r_middle_crust+(r_ocean-r_middle_crust)*dble(i-627)/dble(6)
+  enddo
+  do i=634,NR
+    r(i) = r_ocean+(r_0-r_ocean)*dble(i-634)/dble(6)
+  enddo
+
+! use PREM to get the density profile for ellipticity (fine for other 1D reference models)
+  do i=1,NR
+    call prem_density(r(i),rho(i), &
+                    RICB,RCMB,RTOPDDOUBLEPRIME, &
+                    R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN_GRAVITY)
+  enddo
+
+  g(1)=0.0d0
+  do i=2,NR
+    call intgrl(i_rho,r,1,i,rho,s1,s2,s3)
+    g(i)=4.0d0*i_rho/(r(i)*r(i))
+  enddo
+
+!
+! get ready to spline g
+!
+  nspl=1
+  rspl(1)=r(1)
+  gspl(1)=g(1)
+  do i=2,NR
+    if(r(i)/=r(i-1)) then
+      nspl=nspl+1
+      rspl(nspl)=r(i)
+      gspl(nspl)=g(i)
+    endif
+  enddo
+  yp1=(4.0d0/3.0d0)*rho(1)
+  ypn=4.0d0*rho(NR)-2.0d0*g(NR)/r(NR)
+  call spline_construction(rspl,gspl,nspl,yp1,ypn,gspl2)
+
+  end subroutine make_gravity
+
+
+!--------------------------------------------------------------------------------------------------
+!
+! PREM [Dziewonski and Anderson, 1981].
+!
+! A. M. Dziewonski and D. L. Anderson.
+! Preliminary reference Earth model.
+! Phys. Earth Planet. Inter., 25:297–356, 1981.
+!
+! Isotropic (iso) and transversely isotropic (aniso) version of the
+! spherically symmetric Preliminary Reference Earth Model
+!
+!--------------------------------------------------------------------------------------------------
+
+
+  subroutine model_prem_iso(x,rho,drhodr,vp,vs,Qkappa,Qmu,&
+                          RICB,RCMB,RTOPDDOUBLEPRIME, &
+                          R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN)
+
+  implicit none
+
+  include "constants.h"
+
+! given a normalized radius x, gives the non-dimensionalized density rho,
+! speeds vp and vs, and the quality factors Qkappa and Qmu
+
+  double precision x,rho,drhodr,vp,vs,Qkappa,Qmu,RICB,RCMB,RTOPDDOUBLEPRIME, &
+      R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN
+
+  double precision r,scaleval
+
+  logical,parameter :: ONE_CRUST = .false.
+  logical,parameter :: CRUSTAL = .false.
+
+! compute real physical radius in meters
+  r = x * R_EARTH
+
+!
+!--- inner core
+!
+  if(r >= 0.d0 .and. r <= RICB) then
+    drhodr=-2.0d0*8.8381d0*x
+    rho=13.0885d0-8.8381d0*x*x
+    vp=11.2622d0-6.3640d0*x*x
+    vs=3.6678d0-4.4475d0*x*x
+    Qmu=84.6d0
+    Qkappa=1327.7d0
+!
+!--- outer core
+!
+  else if(r > RICB .and. r <= RCMB) then
+    drhodr=-1.2638d0-2.0d0*3.6426d0*x-3.0d0*5.5281d0*x*x
+    rho=12.5815d0-1.2638d0*x-3.6426d0*x*x-5.5281d0*x*x*x
+    vp=11.0487d0-4.0362d0*x+4.8023d0*x*x-13.5732d0*x*x*x
+    vs=0.0d0
+    Qmu=0.0d0
+    Qkappa=57827.0d0
+!
+!--- D" at the base of the mantle
+!
+  else if(r > RCMB .and. r <= RTOPDDOUBLEPRIME) then
+    drhodr=-6.4761d0+2.0d0*5.5283d0*x-3.0d0*3.0807d0*x*x
+    rho=7.9565d0-6.4761d0*x+5.5283d0*x*x-3.0807d0*x*x*x
+    vp=15.3891d0-5.3181d0*x+5.5242d0*x*x-2.5514d0*x*x*x
+    vs=6.9254d0+1.4672d0*x-2.0834d0*x*x+0.9783d0*x*x*x
+    Qmu=312.0d0
+    Qkappa=57827.0d0
+!
+!--- mantle: from top of D" to d670
+!
+  else if(r > RTOPDDOUBLEPRIME .and. r <= R771) then
+    drhodr=-6.4761d0+2.0d0*5.5283d0*x-3.0d0*3.0807d0*x*x
+    rho=7.9565d0-6.4761d0*x+5.5283d0*x*x-3.0807d0*x*x*x
+    vp=24.9520d0-40.4673d0*x+51.4832d0*x*x-26.6419d0*x*x*x
+    vs=11.1671d0-13.7818d0*x+17.4575d0*x*x-9.2777d0*x*x*x
+    Qmu=312.0d0
+    Qkappa=57827.0d0
+  else if(r > R771 .and. r <= R670) then
+    drhodr=-6.4761d0+2.0d0*5.5283d0*x-3.0d0*3.0807d0*x*x
+    rho=7.9565d0-6.4761d0*x+5.5283d0*x*x-3.0807d0*x*x*x
+    vp=29.2766d0-23.6027d0*x+5.5242d0*x*x-2.5514d0*x*x*x
+    vs=22.3459d0-17.2473d0*x-2.0834d0*x*x+0.9783d0*x*x*x
+    Qmu=312.0d0
+    Qkappa=57827.0d0
+!
+!--- mantle: above d670
+!
+  else if(r > R670 .and. r <= R600) then
+    drhodr=-1.4836d0
+    rho=5.3197d0-1.4836d0*x
+    vp=19.0957d0-9.8672d0*x
+    vs=9.9839d0-4.9324d0*x
+    Qmu=143.0d0
+    Qkappa=57827.0d0
+  else if(r > R600 .and. r <= R400) then
+    drhodr=-8.0298d0
+    rho=11.2494d0-8.0298d0*x
+    vp=39.7027d0-32.6166d0*x
+    vs=22.3512d0-18.5856d0*x
+    Qmu=143.0d0
+    Qkappa=57827.0d0
+  else if(r > R400 .and. r <= R220) then
+    drhodr=-3.8045d0
+    rho=7.1089d0-3.8045d0*x
+    vp=20.3926d0-12.2569d0*x
+    vs=8.9496d0-4.4597d0*x
+    Qmu=143.0d0
+    Qkappa=57827.0d0
+  else if(r > R220 .and. r <= R80) then
+    drhodr=0.6924d0
+    rho=2.6910d0+0.6924d0*x
+    vp=4.1875d0+3.9382d0*x
+    vs=2.1519d0+2.3481d0*x
+    Qmu=80.0d0
+    Qkappa=57827.0d0
+  else
+    if(CRUSTAL) then
+    ! fill with PREM mantle and later add CRUST2.0
+      if(r > R80) then
+        ! density/velocity from mantle just below moho
+        drhodr=0.6924d0
+        rho=2.6910d0+0.6924d0*x
+        vp=4.1875d0+3.9382d0*x
+        vs=2.1519d0+2.3481d0*x
+        ! shear attenuation for R80 to surface
+        Qmu=600.0d0
+        Qkappa=57827.0d0
+      endif
+    else
+    ! use PREM crust
+      if(r > R80 .and. r <= RMOHO) then
+        drhodr=0.6924d0
+        rho=2.6910d0+0.6924d0*x
+        vp=4.1875d0+3.9382d0*x
+        vs=2.1519d0+2.3481d0*x
+        Qmu=600.0d0
+        Qkappa=57827.0d0
+
+      else if(r > RMOHO .and. r <= RMIDDLE_CRUST) then
+        drhodr=0.0d0
+        rho=2.9d0
+        vp=6.8d0
+        vs=3.9d0
+        Qmu=600.0d0
+        Qkappa=57827.0d0
+
+    ! same properties everywhere in PREM crust if we decide to define only one layer in the crust
+        if(ONE_CRUST) then
+          drhodr=0.0d0
+          rho=2.6d0
+          vp=5.8d0
+          vs=3.2d0
+          Qmu=600.0d0
+          Qkappa=57827.0d0
+        endif
+
+      else if(r > RMIDDLE_CRUST .and. r <= ROCEAN) then
+        drhodr=0.0d0
+        rho=2.6d0
+        vp=5.8d0
+        vs=3.2d0
+        Qmu=600.0d0
+        Qkappa=57827.0d0
+    ! for density profile for gravity, we do not check that r <= R_EARTH
+      else if(r > ROCEAN) then
+        drhodr=0.0d0
+        rho=2.6d0
+        vp=5.8d0
+        vs=3.2d0
+        Qmu=600.0d0
+        Qkappa=57827.0d0
+
+      endif
+    endif
+  endif
+
+! non-dimensionalize
+! time scaling (s^{-1}) is done with scaleval
+  scaleval=dsqrt(PI*GRAV*RHOAV)
+  drhodr=drhodr*1000.0d0/RHOAV
+  rho=rho*1000.0d0/RHOAV
+  vp=vp*1000.0d0/(R_EARTH*scaleval)
+  vs=vs*1000.0d0/(R_EARTH*scaleval)
+
+  end subroutine model_prem_iso
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine prem_density(x,rho, &
+                         RICB,RCMB,RTOPDDOUBLEPRIME, &
+                         R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN)
+
+  implicit none
+
+  include "constants.h"
+
+  double precision x,rho,RICB,RCMB,RTOPDDOUBLEPRIME, &
+      R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN
+  double precision r
+
+  logical,parameter :: ONE_CRUST = .false.
+
+  ! compute real physical radius in meters
+  r = x * R_EARTH
+
+  ! calculates density according to radius
+  if(r <= RICB) then
+    rho=13.0885d0-8.8381d0*x*x
+  else if(r > RICB .and. r <= RCMB) then
+    rho=12.5815d0-1.2638d0*x-3.6426d0*x*x-5.5281d0*x*x*x
+  else if(r > RCMB .and. r <= RTOPDDOUBLEPRIME) then
+    rho=7.9565d0-6.4761d0*x+5.5283d0*x*x-3.0807d0*x*x*x
+  else if(r > RTOPDDOUBLEPRIME .and. r <= R771) then
+    rho=7.9565d0-6.4761d0*x+5.5283d0*x*x-3.0807d0*x*x*x
+  else if(r > R771 .and. r <= R670) then
+    rho=7.9565d0-6.4761d0*x+5.5283d0*x*x-3.0807d0*x*x*x
+  else if(r > R670 .and. r <= R600) then
+    rho=5.3197d0-1.4836d0*x
+  else if(r > R600 .and. r <= R400) then
+    rho=11.2494d0-8.0298d0*x
+  else if(r > R400 .and. r <= R220) then
+    rho=7.1089d0-3.8045d0*x
+  else if(r > R220 .and. r <= R80) then
+    rho=2.6910d0+0.6924d0*x
+  else
+    if(r > R80 .and. r <= RMOHO) then
+      rho=2.6910d0+0.6924d0*x
+    else if(r > RMOHO .and. r <= RMIDDLE_CRUST) then
+      if(ONE_CRUST) then
+        rho=2.6d0
+      else
+        rho=2.9d0
+      endif
+    else if(r > RMIDDLE_CRUST .and. r <= ROCEAN) then
+      rho=2.6d0
+    else if(r > ROCEAN) then
+      rho=2.6d0
+    endif
+  endif
+
+  rho=rho*1000.0d0/RHOAV
+
+  end subroutine prem_density
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine intgrl(sum,r,nir,ner,f,s1,s2,s3)
+
+! Computes the integral of f[i]*r[i]*r[i] from i=nir to i=ner for
+! radii values as in model PREM_an640
+
+  implicit none
+
+! Argument variables
+  integer ner,nir
+  double precision f(640),r(640),s1(640),s2(640)
+  double precision s3(640),sum
+
+! Local variables
+  double precision, parameter :: third = 1.0d0/3.0d0
+  double precision, parameter :: fifth = 1.0d0/5.0d0
+  double precision, parameter :: sixth = 1.0d0/6.0d0
+
+  double precision rji,yprime(640)
+  double precision s1l,s2l,s3l
+
+  integer i,j,n,kdis(28)
+  integer ndis,nir1
+
+  data kdis/163,323,336,517,530,540,565,590,609,619,626,633,16*0/
+
+  ndis = 12
+  n = 640
+
+  call deriv(f,yprime,n,r,ndis,kdis,s1,s2,s3)
+  nir1 = nir + 1
+  sum = 0.0d0
+  do i=nir1,ner
+    j = i-1
+    rji = r(i) - r(j)
+    s1l = s1(j)
+    s2l = s2(j)
+    s3l = s3(j)
+    sum = sum + r(j)*r(j)*rji*(f(j) &
+              + rji*(0.5d0*s1l + rji*(third*s2l + rji*0.25d0*s3l))) &
+              + 2.0d0*r(j)*rji*rji*(0.5d0*f(j) + rji*(third*s1l + rji*(0.25d0*s2l + rji*fifth*s3l))) &
+              + rji*rji*rji*(third*f(j) + rji*(0.25d0*s1l + rji*(fifth*s2l + rji*sixth*s3l)))
+  enddo
+
+  end subroutine intgrl
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine deriv(y,yprime,n,r,ndis,kdis,s1,s2,s3)
+
+  implicit none
+
+! Argument variables
+  integer kdis(28),n,ndis
+  double precision r(n),s1(n),s2(n),s3(n)
+  double precision y(n),yprime(n)
+
+! Local variables
+  integer i,j,j1,j2
+  integer k,nd,ndp
+  double precision a0,b0,b1
+  double precision f(3,1000),h,h2,h2a
+  double precision h2b,h3a,ha,s13
+  double precision s21,s32,yy(3)
+
+  yy(1) = 0.d0
+  yy(2) = 0.d0
+  yy(3) = 0.d0
+
+  ndp=ndis+1
+  do 3 nd=1,ndp
+  if(nd == 1) goto 4
+  if(nd == ndp) goto 5
+  j1=kdis(nd-1)+1
+  j2=kdis(nd)-2
+  goto 6
+    4 j1=1
+  j2=kdis(1)-2
+  goto 6
+    5 j1=kdis(ndis)+1
+  j2=n-2
+    6 if((j2+1-j1)>0) goto 11
+  j2=j2+2
+  yy(1)=(y(j2)-y(j1))/(r(j2)-r(j1))
+  s1(j1)=yy(1)
+  s1(j2)=yy(1)
+  s2(j1)=yy(2)
+  s2(j2)=yy(2)
+  s3(j1)=yy(3)
+  s3(j2)=yy(3)
+  goto 3
+   11 a0=0.0d0
+  if(j1 == 1) goto 7
+  h=r(j1+1)-r(j1)
+  h2=r(j1+2)-r(j1)
+  yy(1)=h*h2*(h2-h)
+  h=h*h
+  h2=h2*h2
+  b0=(y(j1)*(h-h2)+y(j1+1)*h2-y(j1+2)*h)/yy(1)
+  goto 8
+ 7 b0=0.0d0
+ 8 b1=b0
+
+  if(j2 > 1000) stop 'error in subroutine deriv for j2'
+
+  do i=j1,j2
+    h=r(i+1)-r(i)
+    yy(1)=y(i+1)-y(i)
+    h2=h*h
+    ha=h-a0
+    h2a=h-2.0d0*a0
+    h3a=2.0d0*h-3.0d0*a0
+    h2b=h2*b0
+    s1(i)=h2/ha
+    s2(i)=-ha/(h2a*h2)
+    s3(i)=-h*h2a/h3a
+    f(1,i)=(yy(1)-h*b0)/(h*ha)
+    f(2,i)=(h2b-yy(1)*(2.0d0*h-a0))/(h*h2*h2a)
+    f(3,i)=-(h2b-3.0d0*yy(1)*ha)/(h*h3a)
+    a0=s3(i)
+    b0=f(3,i)
+  enddo
+
+  i=j2+1
+  h=r(i+1)-r(i)
+  yy(1)=y(i+1)-y(i)
+  h2=h*h
+  ha=h-a0
+  h2a=h*ha
+  h2b=h2*b0-yy(1)*(2.d0*h-a0)
+  s1(i)=h2/ha
+  f(1,i)=(yy(1)-h*b0)/h2a
+  ha=r(j2)-r(i+1)
+  yy(1)=-h*ha*(ha+h)
+  ha=ha*ha
+  yy(1)=(y(i+1)*(h2-ha)+y(i)*ha-y(j2)*h2)/yy(1)
+  s3(i)=(yy(1)*h2a+h2b)/(h*h2*(h-2.0d0*a0))
+  s13=s1(i)*s3(i)
+  s2(i)=f(1,i)-s13
+
+  do j=j1,j2
+    k=i-1
+    s32=s3(k)*s2(i)
+    s1(i)=f(3,k)-s32
+    s21=s2(k)*s1(i)
+    s3(k)=f(2,k)-s21
+    s13=s1(k)*s3(k)
+    s2(k)=f(1,k)-s13
+    i=k
+  enddo
+
+  s1(i)=b1
+  j2=j2+2
+  s1(j2)=yy(1)
+  s2(j2)=yy(2)
+  s3(j2)=yy(3)
+ 3 continue
+
+  do i=1,n
+    yprime(i)=s1(i)
+  enddo
+
+  end subroutine deriv
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! compute spline coefficients
+
+  subroutine spline_construction(xpoint,ypoint,npoint,tangent_first_point,tangent_last_point,spline_coefficients)
+
+  implicit none
+
+! tangent to the spline imposed at the first and last points
+  double precision, intent(in) :: tangent_first_point,tangent_last_point
+
+! number of input points and coordinates of the input points
+  integer, intent(in) :: npoint
+  double precision, dimension(npoint), intent(in) :: xpoint,ypoint
+
+! spline coefficients output by the routine
+  double precision, dimension(npoint), intent(out) :: spline_coefficients
+
+  integer :: i
+
+  double precision, dimension(:), allocatable :: temporary_array
+
+  allocate(temporary_array(npoint))
+
+  spline_coefficients(1) = - 1.d0 / 2.d0
+
+  temporary_array(1) = (3.d0/(xpoint(2)-xpoint(1)))*((ypoint(2)-ypoint(1))/(xpoint(2)-xpoint(1))-tangent_first_point)
+
+  do i = 2,npoint-1
+
+    spline_coefficients(i) = ((xpoint(i)-xpoint(i-1))/(xpoint(i+1)-xpoint(i-1))-1.d0) &
+       / ((xpoint(i)-xpoint(i-1))/(xpoint(i+1)-xpoint(i-1))*spline_coefficients(i-1)+2.d0)
+
+    temporary_array(i) = (6.d0*((ypoint(i+1)-ypoint(i))/(xpoint(i+1)-xpoint(i)) &
+       - (ypoint(i)-ypoint(i-1))/(xpoint(i)-xpoint(i-1)))/(xpoint(i+1)-xpoint(i-1)) &
+       - (xpoint(i)-xpoint(i-1))/(xpoint(i+1)-xpoint(i-1))*temporary_array(i-1)) &
+       / ((xpoint(i)-xpoint(i-1))/(xpoint(i+1)-xpoint(i-1))*spline_coefficients(i-1)+2.d0)
+
+  enddo
+
+  spline_coefficients(npoint) = ((3.d0/(xpoint(npoint)-xpoint(npoint-1))) &
+      * (tangent_last_point-(ypoint(npoint)-ypoint(npoint-1))/(xpoint(npoint)-xpoint(npoint-1))) &
+      - 1.d0/2.d0*temporary_array(npoint-1))/(1.d0/2.d0*spline_coefficients(npoint-1)+1.d0)
+
+  do i = npoint-1,1,-1
+    spline_coefficients(i) = spline_coefficients(i)*spline_coefficients(i+1) + temporary_array(i)
+  enddo
+
+  deallocate(temporary_array)
+
+  end subroutine spline_construction
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! evaluate a spline
+
+  subroutine spline_evaluation(xpoint,ypoint,spline_coefficients,npoint,x_evaluate_spline,y_spline_obtained)
+
+  implicit none
+
+! number of input points and coordinates of the input points
+  integer, intent(in) :: npoint
+  double precision, dimension(npoint), intent(in) :: xpoint,ypoint
+
+! spline coefficients to use
+  double precision, dimension(npoint), intent(in) :: spline_coefficients
+
+! abscissa at which we need to evaluate the value of the spline
+  double precision, intent(in):: x_evaluate_spline
+
+! ordinate evaluated by the routine for the spline at this abscissa
+  double precision, intent(out):: y_spline_obtained
+
+  integer :: index_loop,index_lower,index_higher
+
+  double precision :: coef1,coef2
+
+! initialize to the whole interval
+  index_lower = 1
+  index_higher = npoint
+
+! determine the right interval to use, by dichotomy
+  do while (index_higher - index_lower > 1)
+! compute the middle of the interval
+    index_loop = (index_higher + index_lower) / 2
+    if(xpoint(index_loop) > x_evaluate_spline) then
+      index_higher = index_loop
+    else
+      index_lower = index_loop
+    endif
+  enddo
+
+! test that the interval obtained does not have a size of zero
+! (this could happen for instance in the case of duplicates in the input list of points)
+  if(xpoint(index_higher) == xpoint(index_lower)) stop 'incorrect interval found in spline evaluation'
+
+  coef1 = (xpoint(index_higher) - x_evaluate_spline) / (xpoint(index_higher) - xpoint(index_lower))
+  coef2 = (x_evaluate_spline - xpoint(index_lower)) / (xpoint(index_higher) - xpoint(index_lower))
+
+  y_spline_obtained = coef1*ypoint(index_lower) + coef2*ypoint(index_higher) + &
+        ((coef1**3 - coef1)*spline_coefficients(index_lower) + &
+         (coef2**3 - coef2)*spline_coefficients(index_higher))*((xpoint(index_higher) - xpoint(index_lower))**2)/6.d0
+
+  end subroutine spline_evaluation
+
+

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -76,6 +76,13 @@
     endif
 
     write(IMAIN,*)
+    if(GRAVITY) then
+      write(IMAIN,*) 'incorporating gravity'
+    else
+      write(IMAIN,*) 'no gravity'
+    endif
+
+    write(IMAIN,*)
     if(ACOUSTIC_SIMULATION) then
       write(IMAIN,*) 'incorporating acoustic simulation'
     else
@@ -164,6 +171,9 @@
   ! prepares attenuation arrays
   call prepare_timerun_attenuation()
 
+  ! prepares gravity arrays
+  call prepare_timerun_gravity()
+
   ! initializes PML arrays
   if( ABSORBING_CONDITIONS  ) then
     if (SIMULATION_TYPE /= 1 .and. ABSORB_USE_PML )  then
@@ -405,7 +415,99 @@
 
   end subroutine prepare_timerun_attenuation
 
+!
+!-------------------------------------------------------------------------------------------------
+!
 
+  subroutine prepare_timerun_gravity()
+
+! precomputes gravity factors
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  implicit none
+
+  ! local parameters
+  double precision RICB,RCMB,RTOPDDOUBLEPRIME, &
+    R80,R220,R400,R600,R670,R771,RMOHO,RMIDDLE_CRUST,ROCEAN
+  double precision :: rspl_gravity(NR),gspl(NR),gspl2(NR)
+  double precision :: radius,g,dg ! radius_km
+  !double precision :: g_cmb_dble,g_icb_dble
+  double precision :: rho,drhodr,vp,vs,Qkappa,Qmu
+  integer :: nspl_gravity !int_radius
+  integer :: i,j,k,iglob,ier
+
+  ! sets up weights needed for integration of gravity
+  do k=1,NGLLZ
+    do j=1,NGLLY
+      do i=1,NGLLX
+        wgll_cube(i,j,k) = sngl( wxgll(i)*wygll(j)*wzgll(k) )
+      enddo
+    enddo
+  enddo
+
+  ! store g, rho and dg/dr=dg using normalized radius in lookup table every 100 m
+  ! get density and velocity from PREM model using dummy doubling flag
+  ! this assumes that the gravity perturbations are small and smooth
+  ! and that we can neglect the 3D model and use PREM every 100 m in all cases
+  ! this is probably a rather reasonable assumption
+  if(GRAVITY) then
+
+    ! allocates gravity arrays
+    allocate( minus_deriv_gravity(NGLOB_AB), &
+             minus_g(NGLOB_AB), stat=ier)
+    if( ier /= 0 ) stop 'error allocating gravity arrays'
+
+    ! sets up spline table
+    call make_gravity(nspl_gravity,rspl_gravity,gspl,gspl2, &
+                          ROCEAN,RMIDDLE_CRUST,RMOHO,R80,R220,R400,R600,R670, &
+                          R771,RTOPDDOUBLEPRIME,RCMB,RICB)
+
+    ! pre-calculates gravity terms for all global points
+    do iglob = 1,NGLOB_AB
+
+      ! normalized radius ( zstore values given in m, negative values for depth)
+      radius = ( R_EARTH + zstore(iglob) ) / R_EARTH
+      call spline_evaluation(rspl_gravity,gspl,gspl2,nspl_gravity,radius,g)
+
+      ! use PREM density profile to calculate gravity (fine for other 1D models)
+      call model_prem_iso(radius,rho,drhodr,vp,vs,Qkappa,Qmu, &
+                        RICB,RCMB,RTOPDDOUBLEPRIME, &
+                        R600,R670,R220,R771,R400,R80,RMOHO,RMIDDLE_CRUST,ROCEAN)
+
+      ! re-dimensionalize
+      radius = radius * R_EARTH ! in m
+      vp = vp * R_EARTH*dsqrt(PI*GRAV*RHOAV)  ! in m / s
+      rho = rho  * RHOAV  ! in kg / m^3
+      g = g * R_EARTH*(PI*GRAV*RHOAV) ! in m / s^2 ( should be around 10 m/s^2 )
+
+      dg = 4.0d0*rho - 2.0d0*g/radius
+
+      minus_deriv_gravity(iglob) = - dg
+      minus_g(iglob) = - g ! in negative z-direction - g ! / vp**2
+
+      ! debug
+      !if( iglob == 1 .or. iglob == 1000 .or. iglob == 10000 ) then
+      !  print*,'gravity: radius=',radius,'g=',g,'depth=',radius-R_EARTH
+      !  print*,'vp=',vp,'rho=',rho,'kappa=',(vp**2) * rho
+      !  print*,'minus_g..=',minus_g(iglob)
+      !endif
+    enddo
+
+  else
+
+    ! allocates dummy gravity arrays
+    allocate( minus_deriv_gravity(0), &
+             minus_g(0), stat=ier)
+    if( ier /= 0 ) stop 'error allocating gravity arrays'
+
+  endif
+
+  end subroutine prepare_timerun_gravity
+
+
 !
 !-------------------------------------------------------------------------------------------------
 !
@@ -872,6 +974,11 @@
 
   endif ! NOISE_TOMOGRAPHY
 
+  ! prepares gravity arrays
+  call prepare_fields_gravity_device(Mesh_pointer,GRAVITY, &
+                                    minus_deriv_gravity,minus_g,wgll_cube,&
+                                    ACOUSTIC_SIMULATION,rhostore)
+
   ! sends initial data to device
 
   ! puts acoustic initial fields onto GPU

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/specfem3D_par.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -159,7 +159,8 @@
   double precision :: DT
 
   logical :: ATTENUATION,USE_OLSEN_ATTENUATION, &
-            OCEANS,TOPOGRAPHY,ABSORBING_CONDITIONS,ANISOTROPY
+            OCEANS,TOPOGRAPHY,ABSORBING_CONDITIONS,ANISOTROPY, &
+            GRAVITY
 
   logical :: SAVE_FORWARD,SAVE_MESH_FILES,PRINT_SOURCE_TIME_FUNCTION
 
@@ -207,7 +208,11 @@
 !!$  real(kind=CUSTOM_REAL) :: weight, jacobianl
 !!!! NL NL REGOLITH
 
+  ! gravity
+  real(kind=CUSTOM_REAL), dimension(:),allocatable :: minus_deriv_gravity,minus_g
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
 
+
 ! ADJOINT parameters
 
   ! time scheme

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_movie_output.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -150,19 +150,19 @@
                           potential_acoustic, displ_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)
+                          ibool,rhostore,GRAVITY)
       ! velocity vector
       call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
                           potential_dot_acoustic, veloc_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)
+                          ibool,rhostore,GRAVITY)
       ! accel ?
       call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
                           potential_dot_dot_acoustic, accel_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)
+                          ibool,rhostore,GRAVITY)
     endif
 
 
@@ -353,12 +353,12 @@
   use specfem_par_movie
   implicit none
 
-  real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable:: veloc_element
+  real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable:: val_element
   real(kind=CUSTOM_REAL),dimension(1):: dummy
   integer :: ispec2D,ispec,ipoin,iglob,ier
 
   ! allocate array for single elements
-  allocate( veloc_element(NDIM,NGLLX,NGLLY,NGLLZ),stat=ier)
+  allocate( val_element(NDIM,NGLLX,NGLLY,NGLLZ),stat=ier)
   if( ier /= 0 ) stop 'error allocating arrays for movie elements'
 
 ! initializes arrays for point coordinates
@@ -389,51 +389,40 @@
     ispec = faces_surface_ext_mesh_ispec(ispec2D)
 
     if( ispec_is_acoustic(ispec) ) then
-      ! velocity vector
-      call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
-                          potential_dot_acoustic, veloc_element,&
+      if(SAVE_DISPLACEMENT) then
+        ! displacement vector
+        call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+                          potential_acoustic, val_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)
+                          ibool,rhostore,GRAVITY)
+      else
+        ! velocity vector
+        call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
+                          potential_dot_acoustic, val_element,&
+                          hprime_xx,hprime_yy,hprime_zz, &
+                          xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                          ibool,rhostore,GRAVITY)
+      endif
     endif
 
     if (USE_HIGHRES_FOR_MOVIES) then
+      ! all GLL points on a face
       do ipoin = 1, NGLLX*NGLLY
         iglob = faces_surface_ext_mesh(ipoin,ispec2D)
-        ! saves velocity vector
-        if( ispec_is_elastic(ispec) ) then
-          ! velocity x,y,z-components
-          store_val_ux_external_mesh(NGLLX*NGLLY*(ispec2D-1)+ipoin) = veloc(1,iglob)
-          store_val_uy_external_mesh(NGLLX*NGLLY*(ispec2D-1)+ipoin) = veloc(2,iglob)
-          store_val_uz_external_mesh(NGLLX*NGLLY*(ispec2D-1)+ipoin) = veloc(3,iglob)
-        endif
-
-        ! acoustic pressure potential
-        if( ispec_is_acoustic(ispec) ) then
-          ! puts velocity values into storage array
-          call wmo_get_vel_vector(ispec,ispec2D,ipoin, &
-                                veloc_element, &
+        ! puts displ/velocity values into storage array
+        call wmo_get_vel_vector(ispec,ispec2D,ipoin,iglob, &
+                                val_element, &
                                 NGLLX*NGLLY)
-        endif
       enddo
     else
+      ! only corner points
       do ipoin = 1, 4
         iglob = faces_surface_ext_mesh(ipoin,ispec2D)
-        ! saves velocity vector
-        if( ispec_is_elastic(ispec) ) then
-          ! velocity x,y,z-components
-          store_val_ux_external_mesh(NGNOD2D*(ispec2D-1)+ipoin) = veloc(1,iglob)
-          store_val_uy_external_mesh(NGNOD2D*(ispec2D-1)+ipoin) = veloc(2,iglob)
-          store_val_uz_external_mesh(NGNOD2D*(ispec2D-1)+ipoin) = veloc(3,iglob)
-        endif
-
-        ! acoustic pressure potential
-        if( ispec_is_acoustic(ispec) ) then
-          ! puts velocity values into storage array
-          call wmo_get_vel_vector(ispec,ispec2D,ipoin, &
-                                veloc_element, &
+        ! puts displ/velocity values into storage array
+        call wmo_get_vel_vector(ispec,ispec2D,ipoin,iglob, &
+                                val_element, &
                                 NGNOD2D)
-        endif
       enddo
     endif
   enddo
@@ -504,45 +493,65 @@
     close(IOUT)
   endif
 
-  deallocate(veloc_element)
+  deallocate(val_element)
 
   end subroutine wmo_create_movie_surface_em
 
 !================================================================
 
-  subroutine wmo_get_vel_vector(ispec,ispec2D,ipoin, &
-                                veloc_element, &
+  subroutine wmo_get_vel_vector(ispec,ispec2D, &
+                                ipoin,iglob, &
+                                val_element, &
                                 narraydim)
 
   ! put into this separate routine to make compilation faster
 
   use specfem_par,only: NDIM,ibool
+  use specfem_par_elastic,only: displ,veloc,ispec_is_elastic
+  use specfem_par_acoustic,only: ispec_is_acoustic
   use specfem_par_movie
   implicit none
 
-  integer :: ispec,ispec2D,ipoin,narraydim
-  real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: &
-    veloc_element
+  integer,intent(in) :: ispec,ispec2D,ipoin,iglob
+  integer,intent(in) :: narraydim
+  real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: val_element
 
   ! local parameters
-  integer :: i,j,k,iglob
+  integer :: i,j,k
   logical :: is_done
 
-  ! velocity vector
-  is_done = .false.
-  do k=1,NGLLZ
-    do j=1,NGLLY
-      do i=1,NGLLX
-        if( iglob == ibool(i,j,k,ispec) ) then
-          store_val_ux_external_mesh(narraydim*(ispec2D-1)+ipoin) = veloc_element(1,i,j,k)
-          store_val_uy_external_mesh(narraydim*(ispec2D-1)+ipoin) = veloc_element(2,i,j,k)
-          store_val_uz_external_mesh(narraydim*(ispec2D-1)+ipoin) = veloc_element(3,i,j,k)
-          is_done = .true.
-          return
-        endif
+  ! elastic displacement/velocity
+  if( ispec_is_elastic(ispec) ) then
+    if(SAVE_DISPLACEMENT) then
+      ! velocity x,y,z-components
+      store_val_ux_external_mesh(narraydim*(ispec2D-1)+ipoin) = displ(1,iglob)
+      store_val_uy_external_mesh(narraydim*(ispec2D-1)+ipoin) = displ(2,iglob)
+      store_val_uz_external_mesh(narraydim*(ispec2D-1)+ipoin) = displ(3,iglob)
+    else
+      ! velocity x,y,z-components
+      store_val_ux_external_mesh(narraydim*(ispec2D-1)+ipoin) = veloc(1,iglob)
+      store_val_uy_external_mesh(narraydim*(ispec2D-1)+ipoin) = veloc(2,iglob)
+      store_val_uz_external_mesh(narraydim*(ispec2D-1)+ipoin) = veloc(3,iglob)
+    endif
+  endif
+
+  ! acoustic pressure potential
+  if( ispec_is_acoustic(ispec) ) then
+    is_done = .false.
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+          if( iglob == ibool(i,j,k,ispec) ) then
+            store_val_ux_external_mesh(narraydim*(ispec2D-1)+ipoin) = val_element(1,i,j,k)
+            store_val_uy_external_mesh(narraydim*(ispec2D-1)+ipoin) = val_element(2,i,j,k)
+            store_val_uz_external_mesh(narraydim*(ispec2D-1)+ipoin) = val_element(3,i,j,k)
+            is_done = .true.
+            return
+          endif
+        enddo
       enddo
     enddo
-  enddo
+  endif
 
   end subroutine wmo_get_vel_vector
 
@@ -625,14 +634,14 @@
                           potential_acoustic, val_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)
+                          ibool,rhostore,GRAVITY)
       else
         ! velocity vector
         call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
                           potential_dot_acoustic, val_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)
+                          ibool,rhostore,GRAVITY)
       endif
     endif
 
@@ -645,25 +654,12 @@
         j = free_surface_ijk(2,igll,iface)
         k = free_surface_ijk(3,igll,iface)
         iglob = ibool(i,j,k,ispec)
-        ! elastic displacement/velocity
-        if( ispec_is_elastic(ispec) ) then
-          if(SAVE_DISPLACEMENT) then
-             store_val_ux_external_mesh(ipoin) = displ(1,iglob)
-             store_val_uy_external_mesh(ipoin) = displ(2,iglob)
-             store_val_uz_external_mesh(ipoin) = displ(3,iglob)
-          else
-             store_val_ux_external_mesh(ipoin) = veloc(1,iglob)
-             store_val_uy_external_mesh(ipoin) = veloc(2,iglob)
-             store_val_uz_external_mesh(ipoin) = veloc(3,iglob)
-          endif
-        endif
 
-        ! acoustic pressure potential
-        if( ispec_is_acoustic(ispec) ) then
-          ! stores values from element
-          call wmo_get_val_elem(ispec,ipoin,val_element)
-        endif
-
+        ! puts displ/velocity values into storage array
+        call wmo_get_vel_vector(ispec,0, &
+                               ipoin,iglob, &
+                               val_element, &
+                               0)
       enddo
     else
       imin = minval( free_surface_ijk(1,:,iface) )
@@ -683,25 +679,11 @@
           iglob = ibool(iorderi(iloc),iorderj(iloc),kmin,ispec)
         endif
 
-        ! elastic displacement/velocity
-        if( ispec_is_elastic(ispec) ) then
-          if(SAVE_DISPLACEMENT) then
-             store_val_ux_external_mesh(ipoin) = displ(1,iglob)
-             store_val_uy_external_mesh(ipoin) = displ(2,iglob)
-             store_val_uz_external_mesh(ipoin) = displ(3,iglob)
-          else
-             store_val_ux_external_mesh(ipoin) = veloc(1,iglob)
-             store_val_uy_external_mesh(ipoin) = veloc(2,iglob)
-             store_val_uz_external_mesh(ipoin) = veloc(3,iglob)
-          endif
-        endif
-
-        ! acoustic pressure potential
-        if( ispec_is_acoustic(ispec) ) then
-          ! stores values from element
-          call wmo_get_val_elem(ispec,ipoin,val_element)
-        endif
-
+        ! puts displ/velocity values into storage array
+        call wmo_get_vel_vector(ispec,0, &
+                               ipoin,iglob, &
+                               val_element, &
+                               0)
       enddo ! iloc
     endif
   enddo ! iface
@@ -773,42 +755,6 @@
 
   end subroutine wmo_movie_surface_output_o
 
-!================================================================
-
-  subroutine wmo_get_val_elem(ispec,ipoin,val_element)
-
-  ! put into this separate routine to make compilation faster
-
-  use specfem_par,only: NDIM,ibool
-  use specfem_par_movie
-  implicit none
-
-  integer :: ispec,ipoin
-  real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: &
-    val_element
-
-  ! local parameters
-  integer :: i,j,k,iglob
-  logical :: is_done
-
-  ! velocity vector
-  is_done = .false.
-  do k=1,NGLLZ
-    do j=1,NGLLY
-      do i=1,NGLLX
-        if( iglob == ibool(i,j,k,ispec) ) then
-          store_val_ux_external_mesh(ipoin) = val_element(1,i,j,k)
-          store_val_uy_external_mesh(ipoin) = val_element(2,i,j,k)
-          store_val_uz_external_mesh(ipoin) = val_element(3,i,j,k)
-          is_done = .true.
-          return
-        endif
-      enddo
-    enddo
-  enddo
-
-  end subroutine wmo_get_val_elem
-
 !=====================================================================
 
   subroutine wmo_create_shakemap_o()
@@ -845,19 +791,19 @@
                           potential_acoustic, displ_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)
+                          ibool,rhostore,GRAVITY)
       ! velocity vector
       call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
                           potential_dot_acoustic, veloc_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)
+                          ibool,rhostore,GRAVITY)
       ! accel ?
       call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
                           potential_dot_dot_acoustic, accel_element,&
                           hprime_xx,hprime_yy,hprime_zz, &
                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                          ibool,rhostore)
+                          ibool,rhostore,GRAVITY)
     endif
 
     ! save all points for high resolution, or only four corners for low resolution
@@ -1089,7 +1035,7 @@
                         potential_dot_acoustic, veloc_element,&
                         hprime_xx,hprime_yy,hprime_zz, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        ibool,rhostore)
+                        ibool,rhostore,GRAVITY)
       velocity_x(:,:,:,ispec) = veloc_element(1,:,:,:)
       velocity_y(:,:,:,ispec) = veloc_element(2,:,:,:)
       velocity_z(:,:,:,ispec) = veloc_element(3,:,:,:)

Modified: seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90	2011-11-16 22:35:56 UTC (rev 19211)
+++ seismo/3D/SPECFEM3D/branches/SPECFEM3D_SUNFLOWER/src/specfem3D/write_seismograms.f90	2011-11-17 01:58:40 UTC (rev 19212)
@@ -113,13 +113,13 @@
                         potential_acoustic, displ_element,&
                         hprime_xx,hprime_yy,hprime_zz, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        ibool,rhostore)
+                        ibool,rhostore,GRAVITY)
         ! velocity vector
         call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
                         potential_dot_acoustic, veloc_element,&
                         hprime_xx,hprime_yy,hprime_zz, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        ibool,rhostore)
+                        ibool,rhostore,GRAVITY)
 
         ! interpolates displ/veloc/pressure at receiver locations
         call compute_interpolated_dva_ac(displ_element,veloc_element,&
@@ -189,13 +189,13 @@
                         potential_acoustic, displ_element,&
                         hprime_xx,hprime_yy,hprime_zz, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        ibool,rhostore)
+                        ibool,rhostore,GRAVITY)
         ! velocity vector
         call compute_gradient(ispec,NSPEC_AB,NGLOB_AB, &
                         potential_dot_acoustic, veloc_element,&
                         hprime_xx,hprime_yy,hprime_zz, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        ibool,rhostore)
+                        ibool,rhostore,GRAVITY)
 
         ! interpolates displ/veloc/pressure at receiver locations
         call compute_interpolated_dva_ac(displ_element,veloc_element,&
@@ -229,13 +229,13 @@
                         b_potential_acoustic, displ_element,&
                         hprime_xx,hprime_yy,hprime_zz, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        ibool,rhostore)
+                        ibool,rhostore,GRAVITY)
         ! backward fields: velocity vector
         call compute_gradient(ispec,NSPEC_AB,NGLOB_ADJOINT, &
                         b_potential_dot_acoustic, veloc_element,&
                         hprime_xx,hprime_yy,hprime_zz, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        ibool,rhostore)
+                        ibool,rhostore,GRAVITY)
 
         ! backward fields: interpolates displ/veloc/pressure at receiver locations
         call compute_interpolated_dva_ac(displ_element,veloc_element,&



More information about the CIG-COMMITS mailing list