[cig-commits] r19687 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src: cuda specfem3D

Mon Feb 27 11:29:48 PST 2012

Author: joseph.charles
Date: 2012-02-27 11:29:48 -0800 (Mon, 27 Feb 2012)
New Revision: 19687

Modified:
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
Log:
adds cuda routines for coupling inner_core/outer_core, outer_core/crust_mantle and crust_mantle/ocean

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
===================================================================

--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu	2012-02-27 16:39:04 UTC (rev 19686)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu	2012-02-27 19:29:48 UTC (rev 19687)
@@ -43,94 +43,146 @@
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void compute_coupling_acoustic_el_kernel(realw* displ,
-                                                    realw* potential_dot_dot_acoustic,
-                                                    int num_coupling_ac_el_faces,
-                                                    int* coupling_ac_el_ispec,
-                                                    int* coupling_ac_el_ijk,
-                                                    realw* coupling_ac_el_normal,
-                                                    realw* coupling_ac_el_jacobian2Dw,
-                                                    int* ibool,
-                                                    int* ispec_is_inner,
-                                                    int phase_is_inner) {
+__global__ void compute_coupling_fluid_CMB_kernel(realw* displ_crust_mantle,
+						  realw* accel_outer_core,
+						  int* ibool_crust_mantle,
+						  int* ibelm_bottom_crust_mantle,
+						  realw* normal_top_outer_core,
+						  realw* jacobian2D_top_outer_core,
+						  realw* wgllwgll_xy,
+						  int* ibool_outer_core,
+						  int* ibelm_top_outer_core,
+						  int NSPEC2D_TOP_OC) {
 
-  int igll = threadIdx.x;
+  int i = threadIdx.x;
+  int j = threadIdx.y;    
   int iface = blockIdx.x + gridDim.x*blockIdx.y;
 
-  int i,j,k,iglob,ispec;
+  int k,k_corresp,iglob_cm,iglob_oc,ispec,ispec_selected;
   realw displ_x,displ_y,displ_z,displ_n;
   realw nx,ny,nz;
-  realw jacobianw;
+  realw weight;
 
-  if( iface < num_coupling_ac_el_faces){
+  // for surfaces elements exactly at the top of the outer core (crust mantle bottom)
+  if( iface < NSPEC2D_TOP_OC ){
 
-    // don't compute points outside NGLLSQUARE==NGLL2==25
-    // way 2: no further check needed since blocksize = 25
-    //  if(igll<NGLL2) {
+    // "-1" from index values to convert from Fortran-> C indexing
+    ispec = ibelm_top_outer_core[iface] - 1;
 
+    // only for DOFs exactly on the CMB (top of these elements)
+    k = NGLLX - 1;
+
+    // get displacement on the solid side using pointwise matching
+    ispec_selected = ibelm_bottom_crust_mantle[iface] - 1;
+	
+    // get global point number 
+    // corresponding points are located at the bottom of the mantle
+    k_corresp = 0;
+    iglob_cm = ibool_crust_mantle[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k_corresp,ispec_selected)] - 1;
+
+    // elastic displacement on global point
+    displ_x = displ_crust_mantle[iglob_cm*3]; // (1,iglob)
+    displ_y = displ_crust_mantle[iglob_cm*3+1]; // (2,iglob)
+    displ_z = displ_crust_mantle[iglob_cm*3+2]; // (3,iglob)
+
+    // get normal on the CMB
+    nx = normal_top_outer_core[INDEX4(NDIM,NGLLX,NGLLX,0,i,j,iface)]; // (1,i,j,iface)
+    ny = normal_top_outer_core[INDEX4(NDIM,NGLLX,NGLLX,1,i,j,iface)]; // (2,i,j,iface)
+    nz = normal_top_outer_core[INDEX4(NDIM,NGLLX,NGLLX,2,i,j,iface)]; // (3,i,j,iface)
+
+    // calculates displacement component along normal
+    // (normal points outwards of acoustic element)
+    displ_n = displ_x*nx + displ_y*ny + displ_z*nz;
+
+    // formulation with generalized potential: gets associated, weighted jacobian
+    weight = jacobian2D_top_outer_core[INDEX3(NGLLX,NGLLX,i,j,iface)]*wgllwgll_xy[INDEX2(NGLLX,i,j)];
+
+    // get global point number
+    iglob_oc = ibool_outer_core[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1;
+
+    // update fluid acceleration/pressure
+    atomicAdd(&accel_outer_core[iglob_oc],+weight*displ_n);
+    
+  // }
+  }
+}
+
+__global__ void compute_coupling_fluid_ICB_kernel(realw* displ_inner_core,
+						  realw* accel_outer_core,
+						  int* ibool_inner_core,
+						  int* ibelm_top_inner_core,
+						  realw* normal_bottom_outer_core,
+						  realw* jacobian2D_bottom_outer_core,
+						  realw* wgllwgll_xy,
+						  int* ibool_outer_core,
+						  int* ibelm_bottom_outer_core,
+						  int NSPEC2D_BOTTOM_OC) {
+
+  int i = threadIdx.x;
+  int j = threadIdx.y;    
+  int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
+  int k,k_corresp,iglob_ic,iglob_oc,ispec,ispec_selected;
+  realw displ_x,displ_y,displ_z,displ_n;
+  realw nx,ny,nz;
+  realw weight;
+
+  // for surfaces elements exactly at the bottom of the outer core (inner core top)
+  if( iface < NSPEC2D_BOTTOM_OC ){
+
     // "-1" from index values to convert from Fortran-> C indexing
-    ispec = coupling_ac_el_ispec[iface] - 1;
+    ispec = ibelm_bottom_outer_core[iface] - 1;
 
-    if(ispec_is_inner[ispec] == phase_is_inner ) {
+    // only for DOFs exactly on the ICB (bottom of these elements)
+    k = 0;
 
-      i = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1;
-      j = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
-      k = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
-      iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
+    // get displacement on the solid side using pointwise matching
+    ispec_selected = ibelm_top_inner_core[iface] - 1;
+	
+    // get global point number 			       
+    // corresponding points are located at the bottom of the mantle
+    k_corresp = NGLLX - 1;
+    iglob_ic = ibool_inner_core[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k_corresp,ispec_selected)] - 1;
 
-      // elastic displacement on global point
-      displ_x = displ[iglob*3] ; // (1,iglob)
-      displ_y = displ[iglob*3+1] ; // (2,iglob)
-      displ_z = displ[iglob*3+2] ; // (3,iglob)
+    // elastic displacement on global point
+    displ_x = displ_inner_core[iglob_ic*3]; // (1,iglob)
+    displ_y = displ_inner_core[iglob_ic*3+1]; // (2,iglob)
+    displ_z = displ_inner_core[iglob_ic*3+2]; // (3,iglob)
 
-      // gets associated normal on GLL point
-      nx = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; // (1,igll,iface)
-      ny = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,1,igll,iface)]; // (2,igll,iface)
-      nz = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,2,igll,iface)]; // (3,igll,iface)
+    // get normal on the ICB
+    nx = normal_bottom_outer_core[INDEX4(NDIM,NGLLX,NGLLX,0,i,j,iface)]; // (1,i,j,iface)
+    ny = normal_bottom_outer_core[INDEX4(NDIM,NGLLX,NGLLX,1,i,j,iface)]; // (2,i,j,iface)
+    nz = normal_bottom_outer_core[INDEX4(NDIM,NGLLX,NGLLX,2,i,j,iface)]; // (3,i,j,iface)
 
-      // calculates displacement component along normal
-      // (normal points outwards of acoustic element)
-      displ_n = displ_x*nx + displ_y*ny + displ_z*nz;
+    // calculates displacement component along normal
+    // (normal points outwards of acoustic element)
+    displ_n = displ_x*nx + displ_y*ny + displ_z*nz;
 
-      // gets associated, weighted jacobian
-      jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+    // formulation with generalized potential: gets associated, weighted jacobian
+    weight = jacobian2D_bottom_outer_core[INDEX3(NGLLX,NGLLX,i,j,iface)]*wgllwgll_xy[INDEX2(NGLLX,i,j)];
 
-      // continuity of pressure and normal displacement on global point
+    // get global point number
+    iglob_oc = ibool_outer_core[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1;
 
-      // note: Newmark time scheme together with definition of scalar potential:
-      //          pressure = - chi_dot_dot
-      //          requires that this coupling term uses the updated displacement at time step [t+delta_t],
-      //          which is done at the very beginning of the time loop
-      //          (see e.g. Chaljub & Vilotte, Nissen-Meyer thesis...)
-      //          it also means you have to calculate and update this here first before
-      //          calculating the coupling on the elastic side for the acceleration...
-      atomicAdd(&potential_dot_dot_acoustic[iglob],+ jacobianw*displ_n);
+    // update fluid acceleration/pressure
+    atomicAdd(&accel_outer_core[iglob_oc],+ weight*displ_n);
 
-    }
-  //  }
+  // }
   }
 }
 
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(compute_coupling_ac_el_cuda,
-              COMPUTE_COUPLING_AC_EL_CUDA)(
-                                            long* Mesh_pointer_f,
-                                            int* phase_is_innerf,
-                                            int* num_coupling_ac_el_facesf,
-                                            int* SIMULATION_TYPEf) {
-  TRACE("compute_coupling_ac_el_cuda");
+void FC_FUNC_(compute_coupling_fluid_cmb_cuda,
+              COMPUTE_COUPLING_FLUID_CMB_CUDA)(long* Mesh_pointer_f) {
+
+  TRACE("compute_coupling_fluid_cmb_cuda");
   //double start_time = get_time();
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  int phase_is_inner            = *phase_is_innerf;
-  int num_coupling_ac_el_faces  = *num_coupling_ac_el_facesf;
-  int SIMULATION_TYPE           = *SIMULATION_TYPEf;
 
-  // way 1: exact blocksize to match NGLLSQUARE
-  int blocksize = NGLL2;
-  int num_blocks_x = num_coupling_ac_el_faces;
+  int num_blocks_x = mp->nspec2D_top_outer_core;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = (int) ceil(num_blocks_x*0.5f);
@@ -138,138 +190,234 @@
   }
 
   dim3 grid(num_blocks_x,num_blocks_y);
-  dim3 threads(blocksize,1,1);
+  dim3 threads(5,5,1);
 
   // launches GPU kernel
-  compute_coupling_acoustic_el_kernel<<<grid,threads>>>(mp->d_displ,
-                                                       mp->d_potential_dot_dot_acoustic,
-                                                       num_coupling_ac_el_faces,
-                                                       mp->d_coupling_ac_el_ispec,
-                                                       mp->d_coupling_ac_el_ijk,
-                                                       mp->d_coupling_ac_el_normal,
-                                                       mp->d_coupling_ac_el_jacobian2Dw,
-                                                       mp->d_ibool,
-                                                       mp->d_ispec_is_inner,
-                                                       phase_is_inner);
+  compute_coupling_fluid_CMB_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
+						      mp->d_accel_outer_core,
+						      mp->d_ibool_crust_mantle,
+						      mp->d_ibelm_bottom_crust_mantle,
+						      mp->d_normal_top_outer_core,
+						      mp->d_jacobian2D_top_outer_core,
+						      mp->d_wgllwgll_xy,
+						      mp->d_ibool_outer_core,
+						      mp->d_ibelm_top_outer_core,
+						      mp->nspec2D_top_outer_core);
 
-  //  adjoint simulations
-  if (SIMULATION_TYPE == 3 ){
-    compute_coupling_acoustic_el_kernel<<<grid,threads>>>(mp->d_b_displ,
-                                                          mp->d_b_potential_dot_dot_acoustic,
-                                                          num_coupling_ac_el_faces,
-                                                          mp->d_coupling_ac_el_ispec,
-                                                          mp->d_coupling_ac_el_ijk,
-                                                          mp->d_coupling_ac_el_normal,
-                                                          mp->d_coupling_ac_el_jacobian2Dw,
-                                                          mp->d_ibool,
-                                                          mp->d_ispec_is_inner,
-                                                          phase_is_inner);
+  // adjoint simulations
+  if ( mp->simulation_type == 3 ){
+    compute_coupling_fluid_CMB_kernel<<<grid,threads>>>(mp->d_b_displ_crust_mantle,
+							mp->d_b_accel_outer_core,
+							mp->d_ibool_crust_mantle,
+							mp->d_ibelm_bottom_crust_mantle,
+							mp->d_normal_top_outer_core,
+							mp->d_jacobian2D_top_outer_core,
+							mp->d_wgllwgll_xy,
+							mp->d_ibool_outer_core,
+							mp->d_ibelm_top_outer_core,
+							mp->nspec2D_top_outer_core);
+  }
 
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  //double end_time = get_time();
+  //printf("Elapsed time: %e\n",end_time-start_time);
+  exit_on_cuda_error("compute_coupling_fluid_CMB_kernel");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_coupling_fluid_icb_cuda,
+              COMPUTE_COUPLING_FLUID_ICB_CUDA)(long* Mesh_pointer_f) {
+
+  TRACE("compute_coupling_fluid_icb_cuda");
+  //double start_time = get_time();
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  int num_blocks_x = mp->nspec2D_bottom_outer_core;
+  int num_blocks_y = 1;
+  while(num_blocks_x > 65535) {
+    num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+    num_blocks_y = num_blocks_y*2;
   }
 
+  dim3 grid(num_blocks_x,num_blocks_y);
+  dim3 threads(5,5,1);
+
+  // launches GPU kernel
+  compute_coupling_fluid_ICB_kernel<<<grid,threads>>>(mp->d_displ_inner_core,
+						      mp->d_accel_outer_core,
+						      mp->d_ibool_inner_core,
+						      mp->d_ibelm_top_inner_core,
+						      mp->d_normal_bottom_outer_core,
+						      mp->d_jacobian2D_bottom_outer_core,
+						      mp->d_wgllwgll_xy,
+						      mp->d_ibool_outer_core,
+						      mp->d_ibelm_bottom_outer_core,
+						      mp->nspec2D_bottom_outer_core);
+
+  // adjoint simulations
+  if ( mp->simulation_type == 3 ){
+    compute_coupling_fluid_ICB_kernel<<<grid,threads>>>(mp->d_b_displ_inner_core,
+							mp->d_b_accel_outer_core,
+							mp->d_ibool_inner_core,
+							mp->d_ibelm_top_inner_core,
+							mp->d_normal_bottom_outer_core,
+							mp->d_jacobian2D_bottom_outer_core,
+							mp->d_wgllwgll_xy,
+							mp->d_ibool_outer_core,
+							mp->d_ibelm_bottom_outer_core,
+							mp->nspec2D_bottom_outer_core);
+  }
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //double end_time = get_time();
   //printf("Elapsed time: %e\n",end_time-start_time);
-  exit_on_cuda_error("compute_coupling_acoustic_el_kernel");
+  exit_on_cuda_error("compute_coupling_fluid_ICB_kernel");
 #endif
 }
 
-
 /* ----------------------------------------------------------------------------------------------- */
 
 // ELASTIC - ACOUSTIC coupling
 
 /* ----------------------------------------------------------------------------------------------- */
 
-__global__ void compute_coupling_elastic_ac_kernel(realw* potential_dot_dot_acoustic,
-                                                    realw* accel,
-                                                    int num_coupling_ac_el_faces,
-                                                    int* coupling_ac_el_ispec,
-                                                    int* coupling_ac_el_ijk,
-                                                    realw* coupling_ac_el_normal,
-                                                    realw* coupling_ac_el_jacobian2Dw,
-                                                    int* ibool,
-                                                    int* ispec_is_inner,
-                                                    int phase_is_inner,
-                                                    int gravity,
-                                                    realw* minus_g,
-                                                    realw* rhostore,
-                                                    realw* displ) {
+__global__ void compute_coupling_CMB_fluid_kernel(realw* displ_crust_mantle,
+						  realw* accel_crust_mantle,
+						  realw* accel_outer_core,
+						  int* ibool_crust_mantle,
+						  int* ibelm_bottom_crust_mantle,
+						  realw* normal_top_outer_core,
+						  realw* jacobian2D_top_outer_core,
+						  realw* wgllwgll_xy,
+						  int* ibool_outer_core,
+						  int* ibelm_top_outer_core,
+						  double RHO_TOP_OC,
+						  realw minus_g_cmb,
+						  int GRAVITY_VAL,
+						  int NSPEC_BOTTOM_CM) {
 
-  int igll = threadIdx.x;
+  int i = threadIdx.x;
+  int j = threadIdx.y;    
   int iface = blockIdx.x + gridDim.x*blockIdx.y;
 
-  int i,j,k,iglob,ispec;
+  int k,k_corresp,iglob_cm,iglob_oc,ispec,ispec_selected;
   realw pressure;
   realw nx,ny,nz;
-  realw jacobianw;
-  realw rhol;
+  realw weight;
 
-  if( iface < num_coupling_ac_el_faces){
+  // for surfaces elements exactly at the bottom of the crust mantle (outer core top)
+  if( iface < NSPEC_BOTTOM_CM ){
 
-    // don't compute points outside NGLLSQUARE==NGLL2==25
-    // way 2: no further check needed since blocksize = 25
-    //  if(igll<NGLL2) {
-
     // "-1" from index values to convert from Fortran-> C indexing
-    ispec = coupling_ac_el_ispec[iface] - 1;
+    ispec = ibelm_bottom_crust_mantle[iface] - 1;
 
-    if(ispec_is_inner[ispec] == phase_is_inner ) {
+    // only for DOFs exactly on the CMB (bottom of these elements)
+    k = 0;
 
-      i = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1;
-      j = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
-      k = coupling_ac_el_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
-      iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
+    // get velocity potential on the fluid side using pointwise matching
+    ispec_selected = ibelm_top_outer_core[iface] - 1;
 
-      // gets associated normal on GLL point
-      // note: normal points away from acoustic element
-      nx = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; // (1,igll,iface)
-      ny = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,1,igll,iface)]; // (2,igll,iface)
-      nz = coupling_ac_el_normal[INDEX3(NDIM,NGLL2,2,igll,iface)]; // (3,igll,iface)
+    // get normal on the CMB
+    nx = normal_top_outer_core[INDEX4(NDIM,NGLLX,NGLLX,0,i,j,iface)]; // (1,i,j,iface)
+    ny = normal_top_outer_core[INDEX4(NDIM,NGLLX,NGLLX,1,i,j,iface)]; // (2,i,j,iface)
+    nz = normal_top_outer_core[INDEX4(NDIM,NGLLX,NGLLX,2,i,j,iface)]; // (3,i,j,iface)
 
-      // gets associated, weighted jacobian
-      jacobianw = coupling_ac_el_jacobian2Dw[INDEX2(NGLL2,igll,iface)];
+    // get global point number 
+    // corresponding points are located at the top of the outer core
+    k_corresp = NGLLX - 1;
+    iglob_oc = ibool_outer_core[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k_corresp,ispec_selected)] - 1;
+    iglob_cm = ibool_crust_mantle[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1;
 
-      // acoustic pressure on global point
-      if( gravity ){
-        // takes density (from acoustic? element)
-        rhol = rhostore[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)];
+    // compute pressure, taking gravity into account
+    if( GRAVITY_VAL ){
+      pressure = RHO_TOP_OC * ( - accel_outer_core[iglob_oc] + minus_g_cmb 
+				 * (displ_crust_mantle[iglob_cm*3]*nx 
+				    + displ_crust_mantle[iglob_cm*3+1]*ny
+				    + displ_crust_mantle[iglob_cm*3+2]*nz) );
+    }else{
+      pressure = - RHO_TOP_OC * accel_outer_core[iglob_oc];
+    }
+      
+    // formulation with generalized potential: gets associated, weighted jacobian
+    weight = jacobian2D_top_outer_core[INDEX3(NGLLX,NGLLX,i,j,iface)]*wgllwgll_xy[INDEX2(NGLLX,i,j)];
 
-        // note: uses potential chi such that displacement s = grad(chi),
-        //         pressure becomes: p = - kappa ( div( s ) ) = rho ( - dot_dot_chi + g * s )
-        //  g only acting in negative z-direction
+    // update fluid acceleration/pressure
+    atomicAdd(&accel_crust_mantle[iglob_cm*3],+ weight*nx*pressure);
+    atomicAdd(&accel_crust_mantle[iglob_cm*3+1],+ weight*ny*pressure);
+    atomicAdd(&accel_crust_mantle[iglob_cm*3+2],+ weight*nz*pressure);
 
-        // daniel: TODO - check gravity and coupling would be displ * nz  correct?
-        pressure = rhol*( - potential_dot_dot_acoustic[iglob]
-                         + minus_g[iglob] * displ[iglob*3+2] );
+    //  }
+  }
+}
 
-        //daniel: TODO - check gravity and coupling
-        //pressure = - potential_dot_dot_acoustic[iglob] ;
-        //if( iface == 128 && igll == 5 ){
-        //  printf("coupling acoustic: %f %f \n",potential_dot_dot_acoustic[iglob],
-        //             minus_g[iglob] * displ[iglob*3+2]);
-        //}
+__global__ void compute_coupling_ICB_fluid_kernel(realw* displ_inner_core,
+						  realw* accel_inner_core,
+						  realw* accel_outer_core,
+						  int* ibool_inner_core,
+						  int* ibelm_top_inner_core,
+						  realw* normal_bottom_outer_core,
+						  realw* jacobian2D_bottom_outer_core,
+						  realw* wgllwgll_xy,
+						  int* ibool_outer_core,
+						  int* ibelm_bottom_outer_core,
+						  double RHO_BOTTOM_OC,
+						  realw minus_g_icb,
+						  int GRAVITY_VAL,
+						  int NSPEC_TOP_IC) {
 
-      }else{
-        // no gravity: uses potential chi such that displacement s = 1/rho grad(chi)
-        //                  pressure p = - kappa ( div( s ) ) then becomes: p = - dot_dot_chi
-        //                  ( multiplied with factor 1/kappa due to setup of equation of motion )
-        pressure = - potential_dot_dot_acoustic[iglob];
-      }
+  int i = threadIdx.x;
+  int j = threadIdx.y;    
+  int iface = blockIdx.x + gridDim.x*blockIdx.y;
 
-      // continuity of displacement and pressure on global point
-      //
-      // note: Newmark time scheme together with definition of scalar potential:
-      //          pressure = - chi_dot_dot
-      //          requires that this coupling term uses the *UPDATED* pressure (chi_dot_dot), i.e.
-      //          pressure at time step [t + delta_t]
-      //          (see e.g. Chaljub & Vilotte, Nissen-Meyer thesis...)
-      //          it means you have to calculate and update the acoustic pressure first before
-      //          calculating this term...
-      atomicAdd(&accel[iglob*3],+ jacobianw*nx*pressure);
-      atomicAdd(&accel[iglob*3+1],+ jacobianw*ny*pressure);
-      atomicAdd(&accel[iglob*3+2],+ jacobianw*nz*pressure);
+  int k,k_corresp,iglob_ic,iglob_oc,ispec,ispec_selected;
+  realw pressure;
+  realw nx,ny,nz;
+  realw weight;
+
+  // for surfaces elements exactly at the top of the inner core (outer core bottom)
+  if( iface < NSPEC_TOP_IC ){
+
+    // "-1" from index values to convert from Fortran-> C indexing
+    ispec = ibelm_top_inner_core[iface] - 1;
+
+    // only for DOFs exactly on the ICB (top of these elements)
+    k = NGLLX - 1;
+
+    // get velocity potential on the fluid side using pointwise matching
+    ispec_selected = ibelm_bottom_outer_core[iface] - 1;
+
+    // get normal on the ICB
+    nx = normal_bottom_outer_core[INDEX4(NDIM,NGLLX,NGLLX,0,i,j,iface)]; // (1,i,j,iface)
+    ny = normal_bottom_outer_core[INDEX4(NDIM,NGLLX,NGLLX,1,i,j,iface)]; // (2,i,j,iface)
+    nz = normal_bottom_outer_core[INDEX4(NDIM,NGLLX,NGLLX,2,i,j,iface)]; // (3,i,j,iface)
+
+    // get global point number 
+    // corresponding points are located at the bottom of the outer core
+    k_corresp = 0;
+    iglob_oc = ibool_outer_core[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k_corresp,ispec_selected)] - 1;
+    iglob_ic = ibool_inner_core[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1;
+
+    // compute pressure, taking gravity into account
+    if( GRAVITY_VAL ){
+      pressure = RHO_BOTTOM_OC * ( - accel_outer_core[iglob_oc] + minus_g_icb
+				 * (displ_inner_core[iglob_ic*3]*nx 
+				    + displ_inner_core[iglob_ic*3+1]*ny
+				    + displ_inner_core[iglob_ic*3+2]*nz) );
+    }else{
+      pressure = - RHO_BOTTOM_OC * accel_outer_core[iglob_oc];
     }
+      
+    // formulation with generalized potential: gets associated, weighted jacobian
+    weight = jacobian2D_bottom_outer_core[INDEX3(NGLLX,NGLLX,i,j,iface)]*wgllwgll_xy[INDEX2(NGLLX,i,j)];
+
+    // update fluid acceleration/pressure
+    atomicAdd(&accel_inner_core[iglob_ic*3],+ weight*nx*pressure);
+    atomicAdd(&accel_inner_core[iglob_ic*3+1],+ weight*ny*pressure);
+    atomicAdd(&accel_inner_core[iglob_ic*3+2],+ weight*nz*pressure);
+
     //  }
   }
 }
@@ -277,24 +425,18 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
-void FC_FUNC_(compute_coupling_el_ac_cuda,
-              COMPUTE_COUPLING_EL_AC_CUDA)(
-                                                 long* Mesh_pointer_f,
-                                                 int* phase_is_innerf,
-                                                 int* num_coupling_ac_el_facesf,
-                                                 int* SIMULATION_TYPEf) {
-  TRACE("compute_coupling_el_ac_cuda");
+void FC_FUNC_(compute_coupling_cmb_fluid_cuda,
+              COMPUTE_COUPLING_CMB_FLUID_CUDA)(long* Mesh_pointer_f,
+					       double RHO_TOP_OC,
+					       realw minus_g_cmb,
+					       int GRAVITY_VAL) {
+
+  TRACE("compute_coupling_cmb_fluid_cuda");
   //double start_time = get_time();
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  int phase_is_inner            = *phase_is_innerf;
-  int num_coupling_ac_el_faces  = *num_coupling_ac_el_facesf;
-  int SIMULATION_TYPE           = *SIMULATION_TYPEf;
 
-  // way 1: exact blocksize to match NGLLSQUARE
-  int blocksize = 25;
-
-  int num_blocks_x = num_coupling_ac_el_faces;
+  int num_blocks_x = mp->nspec2D_bottom_crust_mantle;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = (int) ceil(num_blocks_x*0.5f);
@@ -302,51 +444,114 @@
   }
 
   dim3 grid(num_blocks_x,num_blocks_y);
-  dim3 threads(blocksize,1,1);
+  dim3 threads(5,5,1);
 
   // launches GPU kernel
-  compute_coupling_elastic_ac_kernel<<<grid,threads>>>(mp->d_potential_dot_dot_acoustic,
-                                                       mp->d_accel,
-                                                       num_coupling_ac_el_faces,
-                                                       mp->d_coupling_ac_el_ispec,
-                                                       mp->d_coupling_ac_el_ijk,
-                                                       mp->d_coupling_ac_el_normal,
-                                                       mp->d_coupling_ac_el_jacobian2Dw,
-                                                       mp->d_ibool,
-                                                       mp->d_ispec_is_inner,
-                                                       phase_is_inner,
-                                                       mp->gravity,
-                                                       mp->d_minus_g,
-                                                       mp->d_rhostore,
-                                                       mp->d_displ);
+  compute_coupling_CMB_fluid_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
+						      mp->d_accel_crust_mantle,
+						      mp->d_accel_outer_core,
+						      mp->d_ibool_crust_mantle,
+						      mp->d_ibelm_bottom_crust_mantle,
+						      mp->d_normal_top_outer_core,
+						      mp->d_jacobian2D_top_outer_core,
+						      mp->d_wgllwgll_xy,
+						      mp->d_ibool_outer_core,
+						      mp->d_ibelm_top_outer_core,
+						      RHO_TOP_OC,
+						      minus_g_cmb,
+						      GRAVITY_VAL,
+						      mp->nspec2D_bottom_crust_mantle);
 
   //  adjoint simulations
-  if (SIMULATION_TYPE == 3 ){
-    compute_coupling_elastic_ac_kernel<<<grid,threads>>>(mp->d_b_potential_dot_dot_acoustic,
-                                                         mp->d_b_accel,
-                                                         num_coupling_ac_el_faces,
-                                                         mp->d_coupling_ac_el_ispec,
-                                                         mp->d_coupling_ac_el_ijk,
-                                                         mp->d_coupling_ac_el_normal,
-                                                         mp->d_coupling_ac_el_jacobian2Dw,
-                                                         mp->d_ibool,
-                                                         mp->d_ispec_is_inner,
-                                                         phase_is_inner,
-                                                         mp->gravity,
-                                                         mp->d_minus_g,
-                                                         mp->d_rhostore,
-                                                         mp->d_b_displ);
+  if ( mp->simulation_type == 3 ){
+    compute_coupling_CMB_fluid_kernel<<<grid,threads>>>(mp->d_b_displ_crust_mantle,
+							mp->d_b_accel_crust_mantle,
+							mp->d_b_accel_outer_core,
+							mp->d_ibool_crust_mantle,
+							mp->d_ibelm_bottom_crust_mantle,
+							mp->d_normal_top_outer_core,
+							mp->d_jacobian2D_top_outer_core,
+							mp->d_wgllwgll_xy,
+							mp->d_ibool_outer_core,
+							mp->d_ibelm_top_outer_core,
+							RHO_TOP_OC,
+							minus_g_cmb,
+							GRAVITY_VAL,
+							mp->nspec2D_bottom_crust_mantle);
+  }
 
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+  //double end_time = get_time();
+  //printf("Elapsed time: %e\n",end_time-start_time);
+  exit_on_cuda_error("compute_coupling_CMB_fluid_cuda");
+#endif
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_coupling_icb_fluid_cuda,
+              COMPUTE_COUPLING_ICB_FLUID_CUDA)(long* Mesh_pointer_f,
+					       double RHO_BOTTOM_OC,
+					       realw minus_g_icb,
+					       int GRAVITY_VAL) {
+
+  TRACE("compute_coupling_icb_fluid_cuda");
+  //double start_time = get_time();
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  int num_blocks_x = mp->nspec2D_top_inner_core;
+  int num_blocks_y = 1;
+  while(num_blocks_x > 65535) {
+    num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+    num_blocks_y = num_blocks_y*2;
   }
 
+  dim3 grid(num_blocks_x,num_blocks_y);
+  dim3 threads(5,5,1);
+
+  // launches GPU kernel
+  compute_coupling_ICB_fluid_kernel<<<grid,threads>>>(mp->d_displ_inner_core,
+						      mp->d_accel_inner_core,
+						      mp->d_accel_outer_core,
+						      mp->d_ibool_inner_core,
+						      mp->d_ibelm_top_inner_core,
+						      mp->d_normal_bottom_outer_core,
+						      mp->d_jacobian2D_bottom_outer_core,
+						      mp->d_wgllwgll_xy,
+						      mp->d_ibool_outer_core,
+						      mp->d_ibelm_bottom_outer_core,
+						      RHO_BOTTOM_OC,
+						      minus_g_icb,
+						      GRAVITY_VAL,
+						      mp->nspec2D_top_inner_core);
+
+  //  adjoint simulations
+  if ( mp->simulation_type == 3 ){
+    compute_coupling_ICB_fluid_kernel<<<grid,threads>>>(mp->d_b_displ_inner_core,
+							mp->d_b_accel_inner_core,
+							mp->d_b_accel_outer_core,
+							mp->d_ibool_inner_core,
+							mp->d_ibelm_top_inner_core,
+							mp->d_normal_bottom_outer_core,
+							mp->d_jacobian2D_bottom_outer_core,
+							mp->d_wgllwgll_xy,
+							mp->d_ibool_outer_core,
+							mp->d_ibelm_bottom_outer_core,
+							RHO_BOTTOM_OC,
+							minus_g_icb,
+							GRAVITY_VAL,
+							mp->nspec2D_top_inner_core);
+  }
+
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //double end_time = get_time();
   //printf("Elapsed time: %e\n",end_time-start_time);
-  exit_on_cuda_error("compute_coupling_el_ac_cuda");
+  exit_on_cuda_error("compute_coupling_ICB_fluid_cuda");
 #endif
 }
 
-
 /* ----------------------------------------------------------------------------------------------- */
 
 /* OCEANS load coupled on free surface */
@@ -354,59 +559,61 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 
-__global__ void compute_coupling_ocean_cuda_kernel(realw* accel,
-                                               realw* rmass,
-                                               realw* rmass_ocean_load,
-                                               int num_free_surface_faces,
-                                               int* free_surface_ispec,
-                                               int* free_surface_ijk,
-                                               realw* free_surface_normal,
-                                               int* ibool,
-                                               int* updated_dof_ocean_load) {
-  // gets spectral element face id
-  int igll = threadIdx.x ;  //  threadIdx.y*blockDim.x will be always = 0 for thread block (25,1,1)
+__global__ void compute_coupling_ocean_cuda_kernel(realw* accel_crust_mantle,
+						   realw* rmass_crust_mantle,
+						   realw* rmass_ocean_load,
+						   realw* normal_top_crust_mantle,
+						   int* ibool_crust_mantle,
+						   int* ibelm_top_crust_mantle,
+						   int* updated_dof_ocean_load,
+						   int NSPEC2D_TOP_CM) {
+
+  int i = threadIdx.x;
+  int j = threadIdx.y;
   int iface = blockIdx.x + gridDim.x*blockIdx.y;
+
+  int k,iglob,ispec;
   realw nx,ny,nz;
   realw force_normal_comp,additional_term;
 
-  // for all faces on free surface
-  if( iface < num_free_surface_faces ){
+  // for surfaces elements exactly at the top of the crust mantle (ocean bottom)
+  if( iface < NSPEC2D_TOP_CM ){
 
-    int ispec = free_surface_ispec[iface]-1;
+    // "-1" from index values to convert from Fortran-> C indexing
+    ispec = ibelm_top_crust_mantle[iface] - 1;
 
-    // gets global point index
-    int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1; // (1,igll,iface)
-    int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
-    int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
+    // only for DOFs exactly on the CMB (top of these elements)
+    k = NGLLX - 1;
 
-    int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1;
+    // get global point number
+    iglob = ibool_crust_mantle[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1;
 
-    //if(igll == 0 ) printf("igll %d %d %d %d\n",igll,i,j,k,iglob);
-
     // only update this global point once
 
     // daniel: TODO - there might be better ways to implement a mutex like below,
     //            and find a workaround to not use the temporary update array.
-    //            atomicExch: returns the old value, i.e. 0 indicates that we still have to do this point
+    //            As a reminder, atomicExch atomically sets the value of the memory 
+    //            location stored in address to val and returns the old value.
+    //            0 indicates that we still have to do this point
 
     if( atomicExch(&updated_dof_ocean_load[iglob],1) == 0){
 
-      // get normal
-      nx = free_surface_normal[INDEX3(NDIM,NGLL2,0,igll,iface)]; //(1,igll,iface)
-      ny = free_surface_normal[INDEX3(NDIM,NGLL2,1,igll,iface)];
-      nz = free_surface_normal[INDEX3(NDIM,NGLL2,2,igll,iface)];
+      // get normal on the CMB 
+      nx = normal_top_crust_mantle[INDEX4(NDIM,NGLLX,NGLLX,0,i,j,iface)]; // (1,i,j,iface)
+      ny = normal_top_crust_mantle[INDEX4(NDIM,NGLLX,NGLLX,1,i,j,iface)]; // (2,i,j,iface)
+      nz = normal_top_crust_mantle[INDEX4(NDIM,NGLLX,NGLLX,2,i,j,iface)]; // (3,i,j,iface)
 
       // make updated component of right-hand side
       // we divide by rmass() which is 1 / M
       // we use the total force which includes the Coriolis term above
-      force_normal_comp = ( accel[iglob*3]*nx + accel[iglob*3+1]*ny + accel[iglob*3+2]*nz ) / rmass[iglob];
+      force_normal_comp = ( accel_crust_mantle[iglob*3]*nx + accel_crust_mantle[iglob*3+1]*ny + accel_crust_mantle[iglob*3+2]*nz ) / rmass_crust_mantle[iglob];
 
-      additional_term = (rmass_ocean_load[iglob] - rmass[iglob]) * force_normal_comp;
+      additional_term = (rmass_ocean_load[iglob] - rmass_crust_mantle[iglob]) * force_normal_comp;
 
       // probably wouldn't need atomicAdd anymore, but just to be sure...
-      atomicAdd(&accel[iglob*3], + additional_term * nx);
-      atomicAdd(&accel[iglob*3+1], + additional_term * ny);
-      atomicAdd(&accel[iglob*3+2], + additional_term * nz);
+      atomicAdd(&accel_crust_mantle[iglob*3], + additional_term * nx);
+      atomicAdd(&accel_crust_mantle[iglob*3+1], + additional_term * ny);
+      atomicAdd(&accel_crust_mantle[iglob*3+2], + additional_term * nz);
     }
   }
 }
@@ -415,20 +622,13 @@
 
 extern "C"
 void FC_FUNC_(compute_coupling_ocean_cuda,
-              COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f,
-                                       int* SIMULATION_TYPE) {
+              COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f) {
 
   TRACE("compute_coupling_ocean_cuda");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
-  // checks if anything to do
-  if( mp->num_free_surface_faces == 0 ) return;
-
-  // block sizes: exact blocksize to match NGLLSQUARE
-  int blocksize = NGLL2;
-
-  int num_blocks_x = mp->num_free_surface_faces;
+  int num_blocks_x = mp->nspec2D_top_crust_mantle;
   int num_blocks_y = 1;
   while(num_blocks_x > 65535) {
     num_blocks_x = (int) ceil(num_blocks_x*0.5f);
@@ -436,7 +636,7 @@
   }
 
   dim3 grid(num_blocks_x,num_blocks_y);
-  dim3 threads(blocksize,1,1);
+  dim3 threads(5,5,1);
 
 
   // initializes temporary array to zero
@@ -447,36 +647,32 @@
   exit_on_cuda_error("before kernel compute_coupling_ocean_cuda");
 #endif
 
-  compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_accel,
-                                                       mp->d_rmass,
+  compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_accel_crust_mantle,
+                                                       mp->d_rmass_crust_mantle,
                                                        mp->d_rmass_ocean_load,
-                                                       mp->num_free_surface_faces,
-                                                       mp->d_free_surface_ispec,
-                                                       mp->d_free_surface_ijk,
-                                                       mp->d_free_surface_normal,
-                                                       mp->d_ibool,
-                                                       mp->d_updated_dof_ocean_load);
+						       mp->d_normal_top_crust_mantle,
+						       mp->d_ibool_crust_mantle,
+						       mp->d_ibelm_top_crust_mantle,
+						       mp->d_updated_dof_ocean_load,
+						       mp->nspec2D_top_crust_mantle);
+
   // for backward/reconstructed potentials
-  if(*SIMULATION_TYPE == 3) {
+  if( mp->simulation_type == 3 ) {
     // re-initializes array
     print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
                                        sizeof(int)*mp->NGLOB_AB),88502);
 
-    compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_b_accel,
-                                                         mp->d_rmass,
-                                                         mp->d_rmass_ocean_load,
-                                                         mp->num_free_surface_faces,
-                                                         mp->d_free_surface_ispec,
-                                                         mp->d_free_surface_ijk,
-                                                         mp->d_free_surface_normal,
-                                                         mp->d_ibool,
-                                                         mp->d_updated_dof_ocean_load);
-
+    compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
+							 mp->d_rmass_crust_mantle,
+							 mp->d_rmass_ocean_load,
+							 mp->d_normal_top_crust_mantle,
+							 mp->d_ibool_crust_mantle,
+							 mp->d_ibelm_top_crust_mantle,
+							 mp->d_updated_dof_ocean_load,
+							 mp->nspec2D_top_crust_mantle);
   }
 
-
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("compute_coupling_ocean_cuda");
 #endif
 }
-

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2012-02-27 16:39:04 UTC (rev 19686)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2012-02-27 19:29:48 UTC (rev 19687)
@@ -169,6 +169,7 @@
   // ------------------------------------------------------------------ //
   int NSPEC_CRUST_MANTLE;
   int NGLOB_CRUST_MANTLE;
+  int NGLOB_CRUST_MANTLE_OCEANS;
 
   // interpolators
   realw* d_xix_crust_mantle; realw* d_xiy_crust_mantle; realw* d_xiz_crust_mantle;
@@ -262,12 +263,19 @@
 
   int nspec_outer_crust_mantle;
   int nspec_inner_crust_mantle;
+  int nspec2D_top_crust_mantle;
+  int nspec2D_bottom_crust_mantle;
 
   int num_colors_inner_crust_mantle;
   int num_colors_outer_crust_mantle;
   int* h_num_elem_colors_crust_mantle;
 
+  int* d_ibelm_top_crust_mantle;
+  int* d_ibelm_bottom_crust_mantle;
 
+  // normal definition for coupling regions
+  realw* d_normal_top_crust_mantle;
+
   // ------------------------------------------------------------------ //
   // outer_core
   // ------------------------------------------------------------------ //
@@ -305,12 +313,24 @@
 
   int nspec_outer_outer_core;
   int nspec_inner_outer_core;
+  int nspec2D_top_outer_core;
+  int nspec2D_bottom_outer_core;
 
   int num_colors_inner_outer_core;
   int num_colors_outer_outer_core;
   int* h_num_elem_colors_outer_core;
 
+  int* d_ibelm_top_outer_core;
+  int* d_ibelm_bottom_outer_core;
 
+  // normals definitions for coupling regions
+  realw* d_normal_top_outer_core;
+  realw* d_normal_bottom_outer_core;
+
+  // jacobian definitions
+  realw* d_jacobian2D_top_outer_core;
+  realw* d_jacobian2D_bottom_outer_core;
+
   // ------------------------------------------------------------------ //
   // inner_core
   // ------------------------------------------------------------------ //
@@ -390,11 +410,14 @@
 
   int nspec_outer_inner_core;
   int nspec_inner_inner_core;
+  int nspec2D_top_inner_core;
 
   int num_colors_inner_inner_core;
   int num_colors_outer_inner_core;
   int* h_num_elem_colors_inner_core;
 
+  int* d_ibelm_top_inner_core;
+
   // ------------------------------------------------------------------ //
   // attenuation
   // ------------------------------------------------------------------ //

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2012-02-27 16:39:04 UTC (rev 19686)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2012-02-27 19:29:48 UTC (rev 19687)
@@ -195,7 +195,8 @@
                                         int* h_number_receiver_global,
                                         int* h_islice_selected_rec,int* h_ispec_selected_rec,
                                         int* nrec,int* nrec_local, int* nadj_rec_local,
-                                        int* NSPEC_CRUST_MANTLE, int* NGLOB_CRUST_MANTLE,
+                                        int* NSPEC_CRUST_MANTLE, int* NGLOB_CRUST_MANTLE, 
+					int* NGLOB_CRUST_MANTLE_OCEANS,
                                         int* NSPEC_OUTER_CORE, int* NGLOB_OUTER_CORE,
                                         int* NSPEC_INNER_CORE, int* NGLOB_INNER_CORE,
                                         int* SIMULATION_TYPE,
@@ -240,6 +241,7 @@
   // sets global parameters
   mp->NSPEC_CRUST_MANTLE = *NSPEC_CRUST_MANTLE;
   mp->NGLOB_CRUST_MANTLE = *NGLOB_CRUST_MANTLE;
+  mp->NGLOB_CRUST_MANTLE_OCEANS = *NGLOB_CRUST_MANTLE_OCEANS;
   mp->NSPEC_OUTER_CORE = *NSPEC_OUTER_CORE;
   mp->NGLOB_OUTER_CORE = *NGLOB_OUTER_CORE;
   mp->NSPEC_INNER_CORE = *NSPEC_INNER_CORE;
@@ -1335,29 +1337,34 @@
 extern "C"
 void FC_FUNC_(prepare_crust_mantle_device,
               PREPARE_CRUST_MANTLE_DEVICE)(long* Mesh_pointer_f,
-                                        realw* h_xix, realw* h_xiy, realw* h_xiz,
-                                        realw* h_etax, realw* h_etay, realw* h_etaz,
-                                        realw* h_gammax, realw* h_gammay, realw* h_gammaz,
-                                        realw* h_rho,
-                                        realw* h_kappav, realw* h_muv,
-                                        realw* h_kappah, realw* h_muh,
-                                        realw* h_eta_aniso,
-                                        realw* h_rmass,
-                                        int* h_ibool,
-                                        realw* h_xstore, realw* h_ystore, realw* h_zstore,
-                                        int* h_ispec_is_tiso,
-                                        realw *c11store,realw *c12store,realw *c13store,
-                                        realw *c14store,realw *c15store,realw *c16store,
-                                        realw *c22store,realw *c23store,realw *c24store,
-                                        realw *c25store,realw *c26store,realw *c33store,
-                                        realw *c34store,realw *c35store,realw *c36store,
-                                        realw *c44store,realw *c45store,realw *c46store,
-                                        realw *c55store,realw *c56store,realw *c66store,
-                                        int* num_phase_ispec,
-                                        int* phase_ispec_inner,
-                                        int* nspec_outer,
-                                        int* nspec_inner
-                                        ) {
+					   realw* h_xix, realw* h_xiy, realw* h_xiz,
+					   realw* h_etax, realw* h_etay, realw* h_etaz,
+					   realw* h_gammax, realw* h_gammay, realw* h_gammaz,
+					   realw* h_rho,
+					   realw* h_kappav, realw* h_muv,
+					   realw* h_kappah, realw* h_muh,
+					   realw* h_eta_aniso,
+					   realw* h_rmass,
+					   realw* h_normal_top_crust_mantle,
+					   int* h_ibelm_top_crust_mantle,
+					   int* h_ibelm_bottom_crust_mantle,
+					   int* h_ibool,
+					   realw* h_xstore, realw* h_ystore, realw* h_zstore,
+					   int* h_ispec_is_tiso,
+					   realw *c11store,realw *c12store,realw *c13store,
+					   realw *c14store,realw *c15store,realw *c16store,
+					   realw *c22store,realw *c23store,realw *c24store,
+					   realw *c25store,realw *c26store,realw *c33store,
+					   realw *c34store,realw *c35store,realw *c36store,
+					   realw *c44store,realw *c45store,realw *c46store,
+					   realw *c55store,realw *c56store,realw *c66store,
+					   int* num_phase_ispec,
+					   int* phase_ispec_inner,
+					   int* nspec_outer,
+					   int* nspec_inner,
+					   int* NSPEC2D_TOP_CM,
+					   int* NSPEC2D_BOTTOM_CM
+					   ) {
 
   TRACE("prepare_crust_mantle_device");
 
@@ -1568,6 +1575,19 @@
   mp->nspec_outer_crust_mantle = *nspec_outer;
   mp->nspec_inner_crust_mantle = *nspec_inner;
 
+  // CMB/ocean coupling
+  mp->nspec2D_top_crust_mantle = *NSPEC2D_TOP_CM;
+  mp->nspec2D_bottom_crust_mantle = *NSPEC2D_BOTTOM_CM;
+  int size_tcm = NGLL2*(mp->nspec2D_top_crust_mantle); 
+
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_normal_top_crust_mantle),sizeof(realw)*NDIM*size_tcm),40020);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_top_crust_mantle,h_normal_top_crust_mantle,sizeof(realw)*NDIM*size_tcm,cudaMemcpyHostToDevice),40030);
+
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ibelm_top_crust_mantle),sizeof(int)*(mp->nspec2D_top_crust_mantle)),40021);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ibelm_bottom_crust_mantle),sizeof(int)*(mp->nspec2D_bottom_crust_mantle)),40021);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_top_crust_mantle,h_ibelm_top_crust_mantle,sizeof(int)*(mp->nspec2D_top_crust_mantle),cudaMemcpyHostToDevice),40031);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_bottom_crust_mantle,h_ibelm_bottom_crust_mantle,sizeof(int)*(mp->nspec2D_bottom_crust_mantle),cudaMemcpyHostToDevice),40031);
+
   // wavefield
   int size = NDIM * mp->NGLOB_CRUST_MANTLE;
 
@@ -1640,12 +1660,20 @@
                                          realw* h_gammax, realw* h_gammay, realw* h_gammaz,
                                          realw* h_rho, realw* h_kappav,
                                          realw* h_rmass,
+					 realw* h_normal_top_outer_core,
+					 realw* h_normal_bottom_outer_core,
+					 realw* h_jacobian2D_top_outer_core,
+					 realw* h_jacobian2D_bottom_outer_core,
+					 int* h_ibelm_top_outer_core,
+					 int* h_ibelm_bottom_outer_core,
                                          int* h_ibool,
                                          realw* h_xstore, realw* h_ystore, realw* h_zstore,
                                          int* num_phase_ispec,
                                          int* phase_ispec_inner,
                                          int* nspec_outer,
-                                         int* nspec_inner
+                                         int* nspec_inner,
+					 int* NSPEC2D_TOP_OC, 
+					 int* NSPEC2D_BOTTOM_OC
                                          ) {
 
   TRACE("prepare_outer_core_device");
@@ -1730,6 +1758,27 @@
   mp->nspec_outer_outer_core = *nspec_outer;
   mp->nspec_inner_outer_core = *nspec_inner;
 
+  // CMB/ICB coupling
+  mp->nspec2D_top_outer_core = *NSPEC2D_TOP_OC;
+  mp->nspec2D_bottom_outer_core = *NSPEC2D_BOTTOM_OC;
+  int size_toc = NGLL2*(mp->nspec2D_top_outer_core);
+  int size_boc = NGLL2*(mp->nspec2D_bottom_outer_core);
+
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_normal_top_outer_core),sizeof(realw)*NDIM*size_toc),40020);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_normal_bottom_outer_core),sizeof(realw)*NDIM*size_boc),40021);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_top_outer_core,h_normal_top_outer_core,sizeof(realw)*NDIM*size_toc,cudaMemcpyHostToDevice),40030);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_bottom_outer_core,h_normal_bottom_outer_core,sizeof(realw)*NDIM*size_boc,cudaMemcpyHostToDevice),40031);
+
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_jacobian2D_top_outer_core),sizeof(realw)*size_toc),40022);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_jacobian2D_bottom_outer_core),sizeof(realw)*size_boc),40023);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_top_outer_core,h_jacobian2D_top_outer_core,sizeof(realw)*size_toc,cudaMemcpyHostToDevice),40032);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_bottom_outer_core,h_jacobian2D_bottom_outer_core,sizeof(realw)*size_boc,cudaMemcpyHostToDevice),40033);
+
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ibelm_top_outer_core),sizeof(int)*(mp->nspec2D_top_outer_core)),40024);
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ibelm_bottom_outer_core),sizeof(int)*(mp->nspec2D_bottom_outer_core)),40025);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_top_outer_core,h_ibelm_top_outer_core,sizeof(int)*(mp->nspec2D_top_outer_core),cudaMemcpyHostToDevice),40034);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_bottom_outer_core,h_ibelm_bottom_outer_core,sizeof(int)*(mp->nspec2D_bottom_outer_core),cudaMemcpyHostToDevice),40035);
+
   // wavefield
   int size = mp->NGLOB_OUTER_CORE;
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_outer_core),sizeof(realw)*size),4001);
@@ -1778,20 +1827,22 @@
 extern "C"
 void FC_FUNC_(prepare_inner_core_device,
               PREPARE_INNER_CORE_DEVICE)(long* Mesh_pointer_f,
-                                           realw* h_xix, realw* h_xiy, realw* h_xiz,
-                                           realw* h_etax, realw* h_etay, realw* h_etaz,
-                                           realw* h_gammax, realw* h_gammay, realw* h_gammaz,
-                                           realw* h_rho, realw* h_kappav, realw* h_muv,
-                                           realw* h_rmass,
-                                           int* h_ibool,
-                                           realw* h_xstore, realw* h_ystore, realw* h_zstore,
-                                           realw *c11store,realw *c12store,realw *c13store,
-                                           realw *c33store,realw *c44store,
-                                           int* h_idoubling_inner_core,
-                                           int* num_phase_ispec,
-                                           int* phase_ispec_inner,
-                                           int* nspec_outer,
-                                           int* nspec_inner) {
+					 realw* h_xix, realw* h_xiy, realw* h_xiz,
+					 realw* h_etax, realw* h_etay, realw* h_etaz,
+					 realw* h_gammax, realw* h_gammay, realw* h_gammaz,
+					 realw* h_rho, realw* h_kappav, realw* h_muv,
+					 realw* h_rmass,
+					 int* h_ibelm_top_inner_core,
+					 int* h_ibool,
+					 realw* h_xstore, realw* h_ystore, realw* h_zstore,
+					 realw *c11store,realw *c12store,realw *c13store,
+					 realw *c33store,realw *c44store,
+					 int* h_idoubling_inner_core,
+					 int* num_phase_ispec,
+					 int* phase_ispec_inner,
+					 int* nspec_outer,
+					 int* nspec_inner,
+					 int* NSPEC2D_TOP_IC) {
 
   TRACE("prepare_inner_core_device");
 
@@ -1919,8 +1970,11 @@
 
   mp->nspec_outer_inner_core = *nspec_outer;
   mp->nspec_inner_inner_core = *nspec_inner;
+  mp->nspec2D_top_inner_core = *NSPEC2D_TOP_IC;
 
-
+  print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ibelm_top_inner_core),sizeof(int)*(mp->nspec2D_top_inner_core)),40021);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_top_inner_core,h_ibelm_top_inner_core,sizeof(int)*(mp->nspec2D_top_inner_core),cudaMemcpyHostToDevice),40031);
+ 
   // wavefield
   int size = NDIM * mp->NGLOB_INNER_CORE;
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_displ_inner_core),sizeof(realw)*size),4001);
@@ -2638,6 +2692,10 @@
   }
   cudaFree(mp->d_phase_ispec_inner_crust_mantle);
 
+  cudaFree(mp->d_normal_top_crust_mantle);
+  cudaFree(mp->d_ibelm_top_crust_mantle);
+  cudaFree(mp->d_ibelm_bottom_crust_mantle);
+
   cudaFree(mp->d_displ_crust_mantle);
   cudaFree(mp->d_veloc_crust_mantle);
   cudaFree(mp->d_accel_crust_mantle);
@@ -2681,6 +2739,15 @@
   cudaFree(mp->d_ibool_outer_core);
   cudaFree(mp->d_phase_ispec_inner_outer_core);
 
+  cudaFree(mp->d_ibelm_top_outer_core);
+  cudaFree(mp->d_ibelm_bottom_outer_core);
+
+  cudaFree(mp->d_normal_top_outer_core);
+  cudaFree(mp->d_normal_bottom_outer_core);
+
+  cudaFree(mp->d_jacobian2D_top_outer_core);
+  cudaFree(mp->d_jacobian2D_bottom_outer_core);
+
   cudaFree(mp->d_displ_outer_core);
   cudaFree(mp->d_veloc_outer_core);
   cudaFree(mp->d_accel_outer_core);
@@ -2716,6 +2783,8 @@
     cudaFree(mp->d_zstore_inner_core);
   }
 
+  cudaFree(mp->d_ibelm_top_inner_core);
+
   if( ! mp->anisotropic_inner_core ){
     cudaFree(mp->d_kappavstore_inner_core);
   }else{

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c	2012-02-27 16:39:04 UTC (rev 19686)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c	2012-02-27 19:29:48 UTC (rev 19687)
@@ -153,20 +153,64 @@
 // src/cuda/compute_coupling_cuda.cu
 //
 
-void FC_FUNC_(compute_coupling_ac_el_cuda,
-              COMPUTE_COUPLING_AC_EL_CUDA)(
-                                            long* Mesh_pointer_f,
-                                            int* phase_is_innerf,
-                                            int* num_coupling_ac_el_facesf,
-                                            int* SIMULATION_TYPEf) {} 
+void FC_FUNC_(compute_coupling_fluid_cmb_cuda,
+              COMPUTE_COUPLING_FLUID_CMB_CUDA)(
+					       long* Mesh_pointer_f,
+					       int* ibool_crust_mantle,
+					       int* ibelm_bottom_crust_mantle,
+					       realw* normal_top_outer_core,
+					       realw* jacobian2D_top_outer_core,
+					       realw* wgllwgll_xy,
+					       int* ibool_outer_core,
+					       int* ibelm_top_outer_core,
+					       int* SIMULATION_TYPE,
+					       int* NSPEC_TOP) {} 
 
-void FC_FUNC_(compute_coupling_el_ac_cuda,
-              COMPUTE_COUPLING_EL_AC_CUDA)(
-                                                 long* Mesh_pointer_f,
-                                                 int* phase_is_innerf,
-                                                 int* num_coupling_ac_el_facesf,
-                                                 int* SIMULATION_TYPEf) {} 
+void FC_FUNC_(compute_coupling_fluid_icb_cuda,
+              COMPUTE_COUPLING_FLUID_ICB_CUDA)(
+					       long* Mesh_pointer_f,
+					       int* ibool_inner_core,
+					       int* ibelm_top_inner_core,
+					       realw* normal_bottom_outer_core,
+					       realw* jacobian2D_bottom_outer_core,
+					       realw* wgllwgll_xy,
+					       int* ibool_outer_core,
+					       int* ibelm_bottom_outer_core,
+					       int* SIMULATION_TYPE,
+					       int* NSPEC_BOTTOM) {} 
 
+void FC_FUNC_(compute_coupling_cmb_fluid_cuda,
+              COMPUTE_COUPLING_CMB_FLUID_CUDA)(
+					       long* Mesh_pointer_f,
+					       int* ibool_crust_mantle,
+					       int* ibelm_bottom_crust_mantle,
+					       realw* normal_top_outer_core,
+					       realw* jacobian2D_top_outer_core,
+					       realw* wgllwgll_xy,
+					       int* ibool_outer_core,
+					       int* ibelm_top_outer_core,
+					       double* RHO_TOP_OC,
+					       realw* minus_g_cmb,
+					       int* GRAVITY_VAL,
+					       int* SIMULATION_TYPE,
+					       int* NSPEC_BOTTOM) {} 
+
+void FC_FUNC_(compute_coupling_icb_fluid_cuda,
+              COMPUTE_COUPLING_ICB_fluid_CUDA)(
+					       long* Mesh_pointer_f,
+					       int* ibool_inner_core,
+					       int* ibelm_top_inner_core,
+					       realw* normal_bottom_outer_core,
+					       realw* jacobian2D_bottom_outer_core,
+					       realw* wgllwgll_xy,
+					       int* ibool_outer_core,
+					       int* ibelm_bottom_outer_core,
+					       double* RHO_BOTTOM_OC,
+					       realw* minus_g_icb,
+					       int* GRAVITY_VAL,
+					       int* SIMULATION_TYPE,
+					       int* NSPEC_TOP) {} 
+
 void FC_FUNC_(compute_coupling_ocean_cuda,
               COMPUTE_COUPLING_OCEAN_CUDA)(long* Mesh_pointer_f,
                                        int* SIMULATION_TYPE) {} 
@@ -662,8 +706,48 @@
               TRANSFER_B_FIELDS_OC_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
                                                 long* Mesh_pointer_f) {} 
 
+void FC_FUNC_(transfer_coupling_fields_fluid_cmb_icb_to_device,
+              TRANSFER_COUPLING_FIELDS_FLUID_CMB_ICB_TO_DEVICE)(int* size_oc, int* size_cm, int* size_cm, 
+								realw* displ_cm, realw* displ_ic, realw* accel,
+								long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_coupling_b_fields_fluid_cmb_icb_to_device,
+              TRANSFER_COUPLING_B_FIELDS_FLUID_CMB_ICB_TO_DEVICE)(int* size_oc, int* size_cm, int* size_ic,
+								  realw* b_displ_cm, realw* b_displ_ic, realw* b_accel,
+								  long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_coupling_fields_fluid_cmb_icb_from_device,
+              TRANSFER_COUPLING_FIELDS_FLUID_CMB_ICB_FROM_DEVICE)(int* size_oc, int* size_cm, int* size_ic, 
+								  realw* displ_cm, realw* displ_ic, realw* accel,
+								  long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_coupling_b_fields_fluid_cmb_icb_from_device,
+              TRANSFER_COUPLING_B_FIELDS_FLUID_CMB_ICB_FROM_DEVICE)(int* size_oc, int* size_cm, int* size_ic, 
+								    realw* b_displ_cm, realw* b_displ_ic, realw* b_accel,
+								    long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_coupling_fields_cmb_icb_fluid_to_device,
+	      TRANSFER_COUPLING_FIELDS_CMB_ICB_FLUID_TO_DEVICE)(int* size_oc, int* size_cm, int* size_ic, 
+								realw* displ_cm, realw* displ_ic, realw* accel_cm, 
+								realw* accel_ic, realw* accel_oc, long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_coupling_b_fields_cmb_icb_fluid_to_device,
+	      TRANSFER_COUPLING_B_FIELDS_CMB_ICB_FLUID_TO_DEVICE)(int* size_oc, int* size_cm, int* size_ic,
+								  realw* b_displ_cm, realw* b_displ_ic, realw* b_accel_cm, 
+								  realw* b_accel_ic, realw* b_accel_oc, long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_coupling_fields_cmb_icb_fluid_from_device,
+	      TRANSFER_COUPLING_FIELDS_CMB_ICB_FLUID_FROM_DEVICE)(int* size_oc, int* size_cm, int* size_ic,
+								  realw* displ_cm, realw* displ_ic, realw* accel_cm, 
+								  realw* accel_ic, realw* accel_oc, long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_coupling_b_fields_cmb_icb_fluid_from_device,
+	      TRANSFER_COUPLING_B_FIELDS_CMB_ICB_FLUID_FROM_DEVICE)(int* size_oc, int* size_cm, int* size_ic,
+								    realw* b_displ_cm, realw* b_displ_ic, realw* b_accel_cm, 
+								    realw* b_accel_ic, realw* b_accel_oc, long* Mesh_pointer_f) {} 
+
 void FC_FUNC_(transfer_accel_cm_to_device,
-              TRNASFER_ACCEL_CM_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {} 
+              TRANSFER_ACCEL_CM_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_displ_cm_from_device,
               TRANSFER_DISPL_CM_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {} 
@@ -690,7 +774,7 @@
               TRANSFER_ACCEL_CM_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_b_accel_cm_from_device,
-              TRNASFER_B_ACCEL_CM_FROM_DEVICE)(int* size, realw* b_accel,long* Mesh_pointer_f) {} 
+              TRANSFER_B_ACCEL_CM_FROM_DEVICE)(int* size, realw* b_accel,long* Mesh_pointer_f) {} 
 
 void FC_FUNC_(transfer_accel_ic_from_device,
               TRANSFER_ACCEL_IC_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {} 
@@ -765,9 +849,57 @@
 
 void FC_FUNC_(transfer_kernels_noise_to_host,
               TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
-                                              realw* h_Sigma_kl,
-                                              int* NSPEC) {} 
+					      realw* h_Sigma_kl,
+					      int* NSPEC_AB) {} 
 
+void FC_FUNC_(transfer_fields_ac_to_device,
+              TRANSFER_FIELDS_AC_TO_DEVICE)(
+					    int* size,
+					    realw* potential_acoustic,
+					    realw* potential_dot_acoustic,
+					    realw* potential_dot_dot_acoustic,
+					    long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_b_fields_ac_to_device,
+              TRANSFER_B_FIELDS_AC_TO_DEVICE)(
+					      int* size,
+					      realw* b_potential_acoustic,
+					      realw* b_potential_dot_acoustic,
+					      realw* b_potential_dot_dot_acoustic,
+					      long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
+									     int* size,
+									     realw* potential_acoustic,
+									     realw* potential_dot_acoustic,
+									     realw* potential_dot_dot_acoustic,
+									     long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_b_fields_ac_from_device,
+              TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
+						int* size,
+						realw* b_potential_acoustic,
+						realw* b_potential_dot_acoustic,
+						realw* b_potential_dot_dot_acoustic,
+						long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_dot_dot_from_device,
+              TRANSFER_DOT_DOT_FROM_DEVICE)(int* size, realw* potential_dot_dot_acoustic,long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_b_dot_dot_from_device,
+              TRANSFER_B_DOT_DOT_FROM_DEVICE)(int* size, realw* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {} 
+
+void FC_FUNC_(transfer_kernels_ac_to_host,
+              TRANSFER_KERNELS_AC_TO_HOST)(long* Mesh_pointer,
+					   realw* h_rho_ac_kl,
+					   realw* h_kappa_ac_kl,
+					   int* NSPEC_AB) {} 
+
+void FC_FUNC_(transfer_kernels_hess_el_tohost,
+              TRANSFER_KERNELS_HESS_EL_TOHOST)(long* Mesh_pointer,
+					       realw* h_hess_kl,
+					       int* NSPEC_AB) {} 
+
 void FC_FUNC_(transfer_kernels_hess_cm_tohost,
               TRANSFER_KERNELS_HESS_CM_TOHOST)(long* Mesh_pointer,
                                               realw* h_hess_kl,

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu	2012-02-27 16:39:04 UTC (rev 19686)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu	2012-02-27 19:29:48 UTC (rev 19687)
@@ -56,7 +56,7 @@
 void FC_FUNC_(transfer_fields_cm_to_device,
               TRANSFER_FIELDS_CM_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
 
-TRACE("transfer_fields_cm_to_device_");
+TRACE("transfer_fields_cm_to_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
@@ -71,7 +71,7 @@
 void FC_FUNC_(transfer_fields_ic_to_device,
               TRANSFER_FIELDS_IC_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
 
-  TRACE("transfer_fields_ic_to_device_");
+  TRACE("transfer_fields_ic_to_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
@@ -86,7 +86,7 @@
 void FC_FUNC_(transfer_fields_oc_to_device,
               TRANSFER_FIELDS_OC_TO_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
 
-  TRACE("transfer_fields_oc_to_device_");
+  TRACE("transfer_fields_oc_to_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
@@ -96,6 +96,57 @@
 
 }
 
+// coupling fluid_CMB_ICB
+extern "C"
+void FC_FUNC_(transfer_coupling_fields_fluid_cmb_icb_to_device,
+              TRANSFER_COUPLING_FIELDS_FLUID_CMB_ICB_TO_DEVICE)(int* size_oc, int* size_cm, int* size_ic, realw* displ_cm, realw* displ_ic, realw* accel, long* Mesh_pointer_f) {
+
+  TRACE("transfer_coupling_fields_fluid_cmb_icb_to_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_accel_outer_core,accel,sizeof(realw)*(*size_oc),cudaMemcpyHostToDevice),40009);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_displ_crust_mantle,displ_cm,sizeof(realw)*(*size_cm),cudaMemcpyHostToDevice),40010);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_displ_inner_core,displ_ic,sizeof(realw)*(*size_ic),cudaMemcpyHostToDevice),40011);
+
+}
+
+// coupling CMB_ICB_fluid
+extern "C"
+void FC_FUNC_(transfer_coupling_fields_cmb_icb_fluid_to_device,
+              TRANSFER_COUPLING_FIELDS_CMB_ICB_FLUID_TO_DEVICE)(int* size_oc, int* size_cm, int* size_ic, realw* displ_cm, realw* displ_ic, realw* accel_cm, realw* accel_ic, realw* accel_oc, long* Mesh_pointer_f) {
+
+  TRACE("transfer_coupling_fields_cmb_icb_fluid_to_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_accel_outer_core,accel_oc,sizeof(realw)*(*size_oc),cudaMemcpyHostToDevice),40009);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_accel_crust_mantle,accel_cm,sizeof(realw)*(*size_cm),cudaMemcpyHostToDevice),40010);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_accel_inner_core,accel_ic,sizeof(realw)*(*size_ic),cudaMemcpyHostToDevice),40010);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_displ_crust_mantle,displ_cm,sizeof(realw)*(*size_cm),cudaMemcpyHostToDevice),40012);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_displ_inner_core,displ_ic,sizeof(realw)*(*size_ic),cudaMemcpyHostToDevice),40013);
+
+}
+
+// coupling CMB_ocean
+extern "C"
+void FC_FUNC_(transfer_coupling_fields_cmb_ocean_to_device,
+	      TRANSFER_COUPLING_FIELDS_CMB_OCEAN_TO_DEVICE)(int* size, realw* accel, long* Mesh_pointer_f) {
+
+  TRACE("transfer_coupling_fields_cmb_ocean_to_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_accel_crust_mantle,accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40009);
+
+}
+
 /* ----------------------------------------------------------------------------------------------- */
 
 // backward/reconstructed fields
@@ -106,12 +157,12 @@
               TRANSFER_FIELDS_B_CM_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
                                               long* Mesh_pointer_f) {
 
-  TRACE("transfer_fields_b_cm_to_device_");
+  TRACE("transfer_fields_b_cm_to_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  cudaMemcpy(mp->d_b_displ_crust_mantle,b_displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
-  cudaMemcpy(mp->d_b_veloc_crust_mantle,b_veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
-  cudaMemcpy(mp->d_b_accel_crust_mantle,b_accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_displ_crust_mantle,b_displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40003);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_veloc_crust_mantle,b_veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40004);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_accel_crust_mantle,b_accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40005);
 
 }
 
@@ -121,12 +172,12 @@
               TRANSFER_FIELDS_B_IC_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
                                               long* Mesh_pointer_f) {
 
-  TRACE("transfer_fields_b_ic_to_device_");
+  TRACE("transfer_fields_b_ic_to_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  cudaMemcpy(mp->d_b_displ_inner_core,b_displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
-  cudaMemcpy(mp->d_b_veloc_inner_core,b_veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
-  cudaMemcpy(mp->d_b_accel_inner_core,b_accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_displ_inner_core,b_displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40003);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_veloc_inner_core,b_veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40004);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_accel_inner_core,b_accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40005);
 
 }
 
@@ -136,17 +187,65 @@
               TRANSFER_FIELDS_B_OC_TO_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
                                               long* Mesh_pointer_f) {
 
-  TRACE("transfer_fields_b_oc_to_device_");
+  TRACE("transfer_fields_b_oc_to_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-  cudaMemcpy(mp->d_b_displ_outer_core,b_displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
-  cudaMemcpy(mp->d_b_veloc_outer_core,b_veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
-  cudaMemcpy(mp->d_b_accel_outer_core,b_accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_displ_outer_core,b_displ,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40003);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_veloc_outer_core,b_veloc,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40004);
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_accel_outer_core,b_accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40005);
 
 }
 
+// coupling fluid_CMB_ICB
+extern "C"
+void FC_FUNC_(transfer_coupling_b_fields_fluid_cmb_icb_to_device,
+              TRANSFER_COUPLING_B_FIELDS_FLUID_CMB_ICB_TO_DEVICE)(int* size_oc, int* size_cm, int* size_ic, realw* b_displ_cm, realw* b_displ_ic, realw* b_accel, long* Mesh_pointer_f) {
 
+  TRACE("transfer_coupling_b_fields_fluid_cmb_icb_to_device");
 
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_accel_outer_core,b_accel,sizeof(realw)*(*size_oc),cudaMemcpyHostToDevice),40009);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_displ_crust_mantle,b_displ_cm,sizeof(realw)*(*size_cm),cudaMemcpyHostToDevice),40010);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_displ_inner_core,b_displ_ic,sizeof(realw)*(*size_ic),cudaMemcpyHostToDevice),40011);
+
+}
+
+// coupling CMB_ICB_fluid
+extern "C"
+void FC_FUNC_(transfer_coupling_b_fields_cmb_icb_fluid_to_device,
+              TRANSFER_COUPLING_B_FIELDS_CMB_ICB_FLUID_TO_DEVICE)(int* size_oc, int* size_cm, int* size_ic, realw* b_displ_cm, realw* b_displ_ic, realw* b_accel_cm, realw* b_accel_ic, realw* b_accel_oc, long* Mesh_pointer_f) {
+
+  TRACE("transfer_coupling_b_fields_cmb_icb_fluid_to_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_accel_outer_core,b_accel_oc,sizeof(realw)*(*size_oc),cudaMemcpyHostToDevice),40009);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_accel_crust_mantle,b_accel_cm,sizeof(realw)*(*size_cm),cudaMemcpyHostToDevice),40010);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_accel_inner_core,b_accel_ic,sizeof(realw)*(*size_ic),cudaMemcpyHostToDevice),40011);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_displ_crust_mantle,b_displ_cm,sizeof(realw)*(*size_cm),cudaMemcpyHostToDevice),40012);
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_displ_inner_core,b_displ_ic,sizeof(realw)*(*size_ic),cudaMemcpyHostToDevice),40013);
+}
+
+// coupling CMB_ocean
+extern "C"
+void FC_FUNC_(transfer_coupling_b_fields_cmb_ocean_to_device,
+	      TRANSFER_COUPLING_B_FIELDS_CMB_OCEAN_TO_DEVICE)(int* size, realw* b_accel, long* Mesh_pointer_f) {
+
+  TRACE("transfer_coupling_b_fields_cmb_ocean_to_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(mp->d_b_accel_crust_mantle,b_accel,sizeof(realw)*(*size),cudaMemcpyHostToDevice),40009);
+
+}
+
 /* ----------------------------------------------------------------------------------------------- */
 
 // transfer memory from GPU device to CPU host
@@ -158,7 +257,7 @@
 void FC_FUNC_(transfer_fields_cm_from_device,
               TRANSFER_FIELDS_CM_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
 
-  TRACE("transfer_fields_cm_from_device_");
+  TRACE("transfer_fields_cm_from_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
@@ -175,7 +274,7 @@
 void FC_FUNC_(transfer_fields_ic_from_device,
               TRANSFER_FIELDS_IC_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
 
-  TRACE("transfer_fields_ic_from_device_");
+  TRACE("transfer_fields_ic_from_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
@@ -192,7 +291,7 @@
 void FC_FUNC_(transfer_fields_oc_from_device,
               TRANSFER_FIELDS_OC_FROM_DEVICE)(int* size, realw* displ, realw* veloc, realw* accel,long* Mesh_pointer_f) {
 
-  TRACE("transfer_fields_oc_from_device_");
+  TRACE("transfer_fields_oc_from_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
@@ -202,23 +301,74 @@
 
 }
 
+// coupling fluid_CMB_IMB
+extern "C"
+void FC_FUNC_(transfer_coupling_fields_fluid_cmb_icb_from_device,
+              TRANSFER_COUPLING_FIELDS_FLUID_CMB_ICB_FROM_DEVICE)(int* size_oc, int* size_cm, int* size_ic, realw* displ_cm, realw* displ_ic, realw* accel, long* Mesh_pointer_f) {
 
+  TRACE("transfer_coupling_fields_fluid_cmb_icb_from_device");
 
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel_outer_core,sizeof(realw)*(*size_oc),cudaMemcpyDeviceToHost),40014);
+
+  print_CUDA_error_if_any(cudaMemcpy(displ_cm,mp->d_displ_crust_mantle,sizeof(realw)*(*size_cm),cudaMemcpyDeviceToHost),40015);
+
+  print_CUDA_error_if_any(cudaMemcpy(displ_ic,mp->d_displ_inner_core,sizeof(realw)*(*size_ic),cudaMemcpyDeviceToHost),40016);
+
+}
+
+// coupling CMB_ICB_fluid
+extern "C"
+void FC_FUNC_(transfer_coupling_fields_cmb_icb_fluid_from_device,
+              TRANSFER_COUPLING_FIELDS_CMB_ICB_FLUID_FROM_DEVICE)(int* size_oc, int* size_cm, int* size_ic, realw* displ_cm, realw* displ_ic, realw* accel_cm, realw* accel_ic, realw* accel_oc, long* Mesh_pointer_f) {
+
+  TRACE("transfer_coupling_fields_cmb_icb_fluid_from_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(accel_oc,mp->d_accel_outer_core,sizeof(realw)*(*size_oc),cudaMemcpyDeviceToHost),40014);
+
+  print_CUDA_error_if_any(cudaMemcpy(accel_cm,mp->d_accel_crust_mantle,sizeof(realw)*(*size_cm),cudaMemcpyDeviceToHost),40015);
+
+  print_CUDA_error_if_any(cudaMemcpy(accel_ic,mp->d_accel_inner_core,sizeof(realw)*(*size_ic),cudaMemcpyDeviceToHost),40016);
+
+  print_CUDA_error_if_any(cudaMemcpy(displ_cm,mp->d_displ_crust_mantle,sizeof(realw)*(*size_cm),cudaMemcpyDeviceToHost),40017);
+
+  print_CUDA_error_if_any(cudaMemcpy(displ_ic,mp->d_displ_inner_core,sizeof(realw)*(*size_ic),cudaMemcpyDeviceToHost),40018);
+
+}
+
+// coupling CMB_ocean
+extern "C"
+void FC_FUNC_(transfer_coupling_fields_cmb_ocean_from_device,
+	      TRANSFER_COUPLING_FIELDS_CMB_OCEAN_FROM_DEVICE)(int* size, realw* accel, long* Mesh_pointer_f) {
+
+  TRACE("transfer_coupling_fields_cmb_ocean_from_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40014);
+
+}
+
 /* ----------------------------------------------------------------------------------------------- */
 
+// backward/reconstructed fields
+
 // crust_mantle
 extern "C"
 void FC_FUNC_(transfer_b_fields_cm_from_device,
               TRANSFER_B_FIELDS_CM_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
                                                 long* Mesh_pointer_f) {
 
-TRACE("transfer_b_fields_cm_from_device_");
+TRACE("transfer_b_fields_cm_from_device");
 
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
-  cudaMemcpy(b_displ,mp->d_b_displ_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost);
-  cudaMemcpy(b_veloc,mp->d_b_veloc_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost);
-  cudaMemcpy(b_accel,mp->d_b_accel_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost);
+  print_CUDA_error_if_any(cudaMemcpy(b_displ,mp->d_b_displ_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+  print_CUDA_error_if_any(cudaMemcpy(b_veloc,mp->d_b_veloc_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40007);
+  print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40008);
 
 }
 
@@ -229,9 +379,8 @@
 void FC_FUNC_(transfer_b_fields_ic_from_device,
               TRANSFER_B_FIELDS_IC_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
                                                 long* Mesh_pointer_f) {
-
-  TRACE("transfer_fields_b_ic_from_device_");
-
+  TRACE("transfer_fields_b_ic_from_device");
+  
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
   print_CUDA_error_if_any(cudaMemcpy(b_displ,mp->d_b_displ_inner_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
@@ -247,9 +396,9 @@
 void FC_FUNC_(transfer_b_fields_oc_from_device,
               TRANSFER_B_FIELDS_OC_FROM_DEVICE)(int* size, realw* b_displ, realw* b_veloc, realw* b_accel,
                                                 long* Mesh_pointer_f) {
-
-  TRACE("transfer_b_fields_oc_from_device_");
-
+  
+  TRACE("transfer_b_fields_oc_from_device");
+  
   Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
 
   print_CUDA_error_if_any(cudaMemcpy(b_displ,mp->d_b_displ_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
@@ -258,12 +407,60 @@
 
 }
 
+// coupling fluid_CMB_ICB
+extern "C"
+void FC_FUNC_(transfer_coupling_b_fields_fluid_cmb_icb_from_device,
+              TRANSFER_COUPLING_B_FIELDS_FLUID_CMB_icb_FROM_DEVICE)(int* size_oc, int* size_cm, int* size_ic, realw* b_displ_cm, realw* b_displ_ic, realw* b_accel, long* Mesh_pointer_f) {
 
+  TRACE("transfer_coupling_b_fields_fluid_cmb_icb_from_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel_outer_core,sizeof(realw)*(*size_oc),cudaMemcpyDeviceToHost),40014);
+
+  print_CUDA_error_if_any(cudaMemcpy(b_displ_cm,mp->d_b_displ_crust_mantle,sizeof(realw)*(*size_cm),cudaMemcpyDeviceToHost),40015);
+
+  print_CUDA_error_if_any(cudaMemcpy(b_displ_ic,mp->d_b_displ_inner_core,sizeof(realw)*(*size_ic),cudaMemcpyDeviceToHost),40016);
+}
+
+// coupling CMB_fluid
+extern "C"
+void FC_FUNC_(transfer_coupling_b_fields_cmb_icb_fluid_from_device,
+              TRANSFER_COUPLING_B_FIELDS_CMB_ICB_FLUID_FROM_DEVICE)(int* size_oc, int* size_cm, int* size_ic, realw* b_displ_cm, realw* b_displ_ic, realw* b_accel_cm, realw* b_accel_ic, realw* b_accel_oc, long* Mesh_pointer_f) {
+
+  TRACE("transfer_coupling_b_fields_cmb_icb_fluid_from_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(b_accel_oc,mp->d_b_accel_outer_core,sizeof(realw)*(*size_oc),cudaMemcpyDeviceToHost),40014);
+
+  print_CUDA_error_if_any(cudaMemcpy(b_accel_cm,mp->d_b_accel_crust_mantle,sizeof(realw)*(*size_cm),cudaMemcpyDeviceToHost),40015);
+
+  print_CUDA_error_if_any(cudaMemcpy(b_accel_ic,mp->d_b_accel_inner_core,sizeof(realw)*(*size_ic),cudaMemcpyDeviceToHost),40016);
+
+  print_CUDA_error_if_any(cudaMemcpy(b_displ_cm,mp->d_b_displ_crust_mantle,sizeof(realw)*(*size_cm),cudaMemcpyDeviceToHost),40017);
+
+  print_CUDA_error_if_any(cudaMemcpy(b_displ_ic,mp->d_b_displ_inner_core,sizeof(realw)*(*size_ic),cudaMemcpyDeviceToHost),40018);
+}
+
+// coupling CMB_ocean
+extern "C"
+void FC_FUNC_(transfer_coupling_b_fields_cmb_ocean_from_device,
+	      TRANSFER_COUPLING_B_FIELDS_CMB_OCEAN_FROM_DEVICE)(int* size, realw* b_accel, long* Mesh_pointer_f) {
+
+  TRACE("transfer_coupling_b_fields_cmb_ocean_from_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+  print_CUDA_error_if_any(cudaMemcpy(b_accel,mp->d_b_accel_crust_mantle,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40014);
+
+}
+
 /* ----------------------------------------------------------------------------------------------- */
 
 extern "C"
 void FC_FUNC_(transfer_accel_cm_to_device,
-              TRNASFER_ACCEL_CM_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {
+              TRANSFER_ACCEL_CM_TO_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {
 
 TRACE("transfer_accel_cm_to_device");
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90	2012-02-27 16:39:04 UTC (rev 19686)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_coupling.f90	2012-02-27 19:29:48 UTC (rev 19687)
@@ -208,7 +208,6 @@
 
           displ_n = displ_x*nx + displ_y*ny + displ_z*nz
 
-
           ! update fluid acceleration/pressure
           iglob_oc = ibool_outer_core(i,j,k,ispec)
           b_accel_outer_core(iglob_oc) = b_accel_outer_core(iglob_oc) - weight*displ_n

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90	2012-02-27 16:39:04 UTC (rev 19686)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90	2012-02-27 19:29:48 UTC (rev 19687)
@@ -121,43 +121,62 @@
 
     ! computes additional contributions to acceleration field
     if( iphase == 1 ) then
-      ! Stacey absorbing boundaries
-      if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_outer_core()
+       <<<<<<< HEAD
+       ! Stacey absorbing boundaries
+       if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_outer_core()
 
-      ! ****************************************************
-      ! **********  add matching with solid part  **********
-      ! ****************************************************
-      ! only for elements in first matching layer in the fluid
-      !---
-      !--- couple with mantle at the top of the outer core
-      !---
-      call load_CPU_acoustic()
-      call load_CPU_elastic()
+       ! ****************************************************
+       ! **********  add matching with solid part  **********
+       ! ****************************************************
+       ! only for elements in first matching layer in the fluid
+       if( .not. GPU_MODE ) then
+          ! on CPU    
+          !---
+          !--- couple with mantle at the top of the outer core
+          !---
+          call load_CPU_acoustic()
+          call load_CPU_elastic()
 
-      if(ACTUALLY_COUPLE_FLUID_CMB) &
-        call compute_coupling_fluid_CMB(displ_crust_mantle,b_displ_crust_mantle, &
-                              ibool_crust_mantle,ibelm_bottom_crust_mantle,  &
-                              accel_outer_core,b_accel_outer_core, &
-                              normal_top_outer_core,jacobian2D_top_outer_core, &
-                              wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
-                              SIMULATION_TYPE,NSPEC2D_TOP(IREGION_OUTER_CORE))
+          if(ACTUALLY_COUPLE_FLUID_CMB) &
+               call compute_coupling_fluid_CMB(displ_crust_mantle,b_displ_crust_mantle, &
+               ibool_crust_mantle,ibelm_bottom_crust_mantle,  &
+               accel_outer_core,b_accel_outer_core, &
+               normal_top_outer_core,jacobian2D_top_outer_core, &
+               wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+               SIMULATION_TYPE,NSPEC2D_TOP(IREGION_OUTER_CORE))
 
-      !---
-      !--- couple with inner core at the bottom of the outer core
-      !---
-      if(ACTUALLY_COUPLE_FLUID_ICB) &
-        call compute_coupling_fluid_ICB(displ_inner_core,b_displ_inner_core, &
-                              ibool_inner_core,ibelm_top_inner_core,  &
-                              accel_outer_core,b_accel_outer_core, &
-                              normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
-                              wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
-                              SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_OUTER_CORE))
+          !---
+          !--- couple with inner core at the bottom of the outer core
+          !---
+          if(ACTUALLY_COUPLE_FLUID_ICB) &
+               call compute_coupling_fluid_ICB(displ_inner_core,b_displ_inner_core, &
+               ibool_inner_core,ibelm_top_inner_core,  &
+               accel_outer_core,b_accel_outer_core, &
+               normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+               wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+               SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_OUTER_CORE))
 
-      call load_GPU_acoustic()
+          call load_GPU_acoustic()
 
-    endif
+       else
+          ! on GPU
+          call load_GPU_acoustic_coupling_fluid()
 
+          !---
+          !--- couple with mantle at the top of the outer core
+          !---
+          if( ACTUALLY_COUPLE_FLUID_CMB ) &
+               call compute_coupling_fluid_cmb_cuda(Mesh_pointer)
+          !---
+          !--- couple with inner core at the bottom of the outer core
+          !---
+          if( ACTUALLY_COUPLE_FLUID_ICB ) &
+               call compute_coupling_fluid_icb_cuda(Mesh_pointer)
 
+          call load_CPU_acoustic_coupling_fluid()
+       endif
+    endif ! iphase == 1
+
     ! assemble all the contributions between slices using MPI
     ! in outer core
     if( iphase == 1 ) then
@@ -362,3 +381,50 @@
   endif
 
   end subroutine
+
+!=====================================================================
+
+  subroutine load_GPU_acoustic_coupling_fluid
+  
+  use specfem_par
+  use specfem_par_outercore,only: accel_outer_core,b_accel_outer_core
+  use specfem_par_innercore,only: displ_inner_core,b_displ_inner_core
+  use specfem_par_crustmantle,only: displ_crust_mantle,b_displ_crust_mantle
+  implicit none
+  
+  ! daniel: TODO - temporary transfers to the GPU
+  call transfer_coupling_fields_fluid_cmb_icb_to_device( &
+       NGLOB_OUTER_CORE,NDIM*NGLOB_CRUST_MANTLE,NDIM*NGLOB_INNER_CORE, &
+       displ_crust_mantle,displ_inner_core,accel_outer_core,Mesh_pointer)
+
+  if( SIMULATION_TYPE == 3 ) then
+     call transfer_coupling_b_fields_fluid_cmb_icb_to_device( &
+          NGLOB_OUTER_CORE_ADJOINT,NDIM*NGLOB_CRUST_MANTLE_ADJOINT,NDIM*NGLOB_INNER_CORE_ADJOINT, &
+          b_displ_crust_mantle,b_displ_inner_core,b_accel_outer_core,Mesh_pointer)  
+  endif
+    
+  end subroutine 
+
+!=====================================================================
+
+  subroutine load_CPU_acoustic_coupling_fluid
+  
+  use specfem_par
+  use specfem_par_outercore,only: accel_outer_core,b_accel_outer_core
+  use specfem_par_innercore,only: displ_inner_core,b_displ_inner_core
+  use specfem_par_crustmantle,only: displ_crust_mantle,b_displ_crust_mantle
+  implicit none
+  
+  ! daniel: TODO - temporary transfers to the CPU
+  call transfer_coupling_fields_fluid_cmb_icb_from_device( &
+       NGLOB_OUTER_CORE,NDIM*NGLOB_CRUST_MANTLE,NDIM*NGLOB_INNER_CORE, &
+       displ_crust_mantle,displ_inner_core,accel_outer_core,Mesh_pointer)
+
+  if( SIMULATION_TYPE == 3 ) then
+     call transfer_coupling_b_fields_fluid_cmb_icb_from_device( &
+          NGLOB_OUTER_CORE_ADJOINT,NDIM*NGLOB_CRUST_MANTLE_ADJOINT,NDIM*NGLOB_INNER_CORE_ADJOINT, &
+          b_displ_crust_mantle,b_displ_inner_core,b_accel_outer_core,Mesh_pointer)  
+  endif
+    
+  end subroutine
+

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90	2012-02-27 16:39:04 UTC (rev 19686)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90	2012-02-27 19:29:48 UTC (rev 19687)
@@ -197,88 +197,106 @@
     ! computes additional contributions to acceleration field
     if( iphase == 1 ) then
 
-      ! absorbing boundaries
-      ! Stacey
-      if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_crust_mantle()
+       ! absorbing boundaries
+       ! Stacey
+       if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_crust_mantle()
 
-      ! add the sources
-      if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) &
-        call compute_add_sources()
+       ! add the sources
+       if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) &
+            call compute_add_sources()
 
-      ! add adjoint sources
-      if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
-        if( nadj_rec_local > 0 ) call compute_add_sources_adjoint()
-      endif
+       ! add adjoint sources
+       if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
+          if( nadj_rec_local > 0 ) call compute_add_sources_adjoint()
+       endif
 
-      ! add sources for backward/reconstructed wavefield
-      if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) &
-        call compute_add_sources_backward()
+       ! add sources for backward/reconstructed wavefield
+       if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) &
+            call compute_add_sources_backward()
 
-      ! NOISE_TOMOGRAPHY
-      select case( NOISE_TOMOGRAPHY )
-      case( 1 )
-        ! the first step of noise tomography is to use |S(\omega)|^2 as a point force source at one of the receivers.
-        ! hence, instead of a moment tensor 'sourcearrays', a 'noise_sourcearray' for a point force is needed.
-        ! furthermore, the CMTSOLUTION needs to be zero, i.e., no earthquakes.
-        ! now this must be manually set in DATA/CMTSOLUTION, by USERS.
-        call noise_add_source_master_rec()
-      case( 2 )
-        ! second step of noise tomography, i.e., read the surface movie saved at every timestep
-        ! use the movie to drive the ensemble forward wavefield
-        call noise_read_add_surface_movie(accel_crust_mantle,NSTEP-it+1)
-        ! be careful, since ensemble forward sources are reversals of generating wavefield "eta"
-        ! hence the "NSTEP-it+1", i.e., start reading from the last timestep
-        ! note the ensemble forward sources are generally distributed on the surface of the earth
-        ! that's to say, the ensemble forward source is kind of a surface force density, not a body force density
-        ! therefore, we must add it here, before applying the inverse of mass matrix
-      case( 3 )
-        ! third step of noise tomography, i.e., read the surface movie saved at every timestep
-        ! use the movie to reconstruct the ensemble forward wavefield
-        ! the ensemble adjoint wavefield is done as usual
-        ! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal
-        call noise_read_add_surface_movie(b_accel_crust_mantle,it)
-      end select
+       ! NOISE_TOMOGRAPHY
+       select case( NOISE_TOMOGRAPHY )
+       case( 1 )
+          ! the first step of noise tomography is to use |S(\omega)|^2 as a point force source at one of the receivers.
+          ! hence, instead of a moment tensor 'sourcearrays', a 'noise_sourcearray' for a point force is needed.
+          ! furthermore, the CMTSOLUTION needs to be zero, i.e., no earthquakes.
+          ! now this must be manually set in DATA/CMTSOLUTION, by USERS.
+          call noise_add_source_master_rec()
+       case( 2 )
+          ! second step of noise tomography, i.e., read the surface movie saved at every timestep
+          ! use the movie to drive the ensemble forward wavefield
+          call noise_read_add_surface_movie(accel_crust_mantle,NSTEP-it+1)
+          ! be careful, since ensemble forward sources are reversals of generating wavefield "eta"
+          ! hence the "NSTEP-it+1", i.e., start reading from the last timestep
+          ! note the ensemble forward sources are generally distributed on the surface of the earth
+          ! that's to say, the ensemble forward source is kind of a surface force density, not a body force density
+          ! therefore, we must add it here, before applying the inverse of mass matrix
+       case( 3 )
+          ! third step of noise tomography, i.e., read the surface movie saved at every timestep
+          ! use the movie to reconstruct the ensemble forward wavefield
+          ! the ensemble adjoint wavefield is done as usual
+          ! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal
+          call noise_read_add_surface_movie(b_accel_crust_mantle,it)
+       end select
 
 
-      ! ****************************************************
-      ! **********  add matching with fluid part  **********
-      ! ****************************************************
-      call load_CPU_elastic()
-      call load_CPU_acoustic()
+       ! ****************************************************
+       ! **********  add matching with fluid part  **********
+       ! ****************************************************
+       ! only for elements in first matching layer in the solid
+       if( .not. GPU_MODE ) then
+          ! on CPU   
+          call load_CPU_elastic()
+          call load_CPU_acoustic()
+          !---
+          !--- couple with outer core at the bottom of the mantle
+          !---
+          if(ACTUALLY_COUPLE_FLUID_CMB) &
+               call compute_coupling_CMB_fluid(displ_crust_mantle,b_displ_crust_mantle, &
+               accel_crust_mantle,b_accel_crust_mantle, &
+               ibool_crust_mantle,ibelm_bottom_crust_mantle,  &
+               accel_outer_core,b_accel_outer_core, &
+               normal_top_outer_core,jacobian2D_top_outer_core, &
+               wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+               RHO_TOP_OC,minus_g_cmb, &
+               SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE))
 
-      ! only for elements in first matching layer in the solid
+          !---
+          !--- couple with outer core at the top of the inner core
+          !---
+          if(ACTUALLY_COUPLE_FLUID_ICB) &
+               call compute_coupling_ICB_fluid(displ_inner_core,b_displ_inner_core, &
+               accel_inner_core,b_accel_inner_core, &
+               ibool_inner_core,ibelm_top_inner_core,  &
+               accel_outer_core,b_accel_outer_core, &
+               normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+               wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+               RHO_BOTTOM_OC,minus_g_icb, &
+               SIMULATION_TYPE,NSPEC2D_TOP(IREGION_INNER_CORE))
 
-      !---
-      !--- couple with outer core at the bottom of the mantle
-      !---
-      if(ACTUALLY_COUPLE_FLUID_CMB) &
-        call compute_coupling_CMB_fluid(displ_crust_mantle,b_displ_crust_mantle, &
-                              accel_crust_mantle,b_accel_crust_mantle, &
-                              ibool_crust_mantle,ibelm_bottom_crust_mantle,  &
-                              accel_outer_core,b_accel_outer_core, &
-                              normal_top_outer_core,jacobian2D_top_outer_core, &
-                              wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
-                              RHO_TOP_OC,minus_g_cmb, &
-                              SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE))
+          call load_GPU_elastic()
 
-      !---
-      !--- couple with outer core at the top of the inner core
-      !---
-      if(ACTUALLY_COUPLE_FLUID_ICB) &
-        call compute_coupling_ICB_fluid(displ_inner_core,b_displ_inner_core, &
-                              accel_inner_core,b_accel_inner_core, &
-                              ibool_inner_core,ibelm_top_inner_core,  &
-                              accel_outer_core,b_accel_outer_core, &
-                              normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
-                              wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
-                              RHO_BOTTOM_OC,minus_g_icb, &
-                              SIMULATION_TYPE,NSPEC2D_TOP(IREGION_INNER_CORE))
+       else
+          ! on GPU
+          call load_GPU_elastic_coupling_fluid()
 
-      call load_GPU_elastic()
+          !---
+          !--- couple with outer core at the bottom of the mantle
+          !---
+          if( ACTUALLY_COUPLE_FLUID_CMB ) &
+               call compute_coupling_cmb_fluid_cuda(Mesh_pointer, &
+               RHO_TOP_OC,minus_g_cmb,GRAVITY_VAL)
+          !---
+          !--- couple with outer core at the top of the inner core
+          !---
+          if( ACTUALLY_COUPLE_FLUID_ICB ) &
+               call compute_coupling_icb_fluid_cuda(Mesh_pointer, &
+               RHO_BOTTOM_OC,minus_g_icb,GRAVITY_VAL)
 
+          call load_CPU_elastic_coupling_fluid()
+       endif
     endif ! iphase == 1
 
-
     ! assemble all the contributions between slices using MPI
 
     ! crust/mantle and inner core handled in the same call
@@ -468,6 +486,7 @@
   ! couples ocean with crust mantle
   ! (updates acceleration with ocean load approximation)
   if(OCEANS_VAL) then
+<<<<<<< HEAD
     call load_CPU_elastic()
 
     call compute_coupling_ocean(accel_crust_mantle,b_accel_crust_mantle, &
@@ -476,6 +495,24 @@
                           updated_dof_ocean_load, &
                           SIMULATION_TYPE,NSPEC2D_TOP(IREGION_CRUST_MANTLE))
     call load_GPU_elastic()
+=======
+     if(.NOT. GPU_MODE) then
+        ! on CPU
+        call compute_coupling_ocean(accel_crust_mantle,b_accel_crust_mantle, &
+             rmass_crust_mantle,rmass_ocean_load,normal_top_crust_mantle, &
+             ibool_crust_mantle,ibelm_top_crust_mantle, &
+             updated_dof_ocean_load, &
+             SIMULATION_TYPE,NSPEC2D_TOP(IREGION_CRUST_MANTLE))
+        
+     else
+        ! on GPU
+        call load_GPU_elastic_coupling_ocean()
+
+        call compute_coupling_ocean_cuda(Mesh_pointer)
+        
+        call load_CPU_elastic_coupling_ocean()
+     endif
+>>>>>>> adds cuda code for coupling inner_core/outer_core, outer_core/crust_mantle and crust_mantle/ocean
   endif
 
   ! Newmark time scheme:
@@ -733,3 +770,95 @@
   endif
 
   end subroutine
+
+!=====================================================================
+
+  subroutine load_GPU_elastic_coupling_fluid
+  
+  use specfem_par
+  use specfem_par_outercore,only: accel_outer_core,b_accel_outer_core
+  use specfem_par_innercore,only: displ_inner_core,b_displ_inner_core, &
+       accel_inner_core,b_accel_inner_core
+  use specfem_par_crustmantle,only: displ_crust_mantle,b_displ_crust_mantle, &
+       accel_crust_mantle,b_accel_crust_mantle
+  implicit none
+  
+  ! daniel: TODO - temporary transfers to the GPU
+  call transfer_coupling_fields_cmb_icb_fluid_to_device( &
+       NGLOB_OUTER_CORE,NDIM*NGLOB_CRUST_MANTLE,NDIM*NGLOB_INNER_CORE, &
+       displ_crust_mantle,displ_inner_core,accel_crust_mantle,accel_inner_core, &
+       accel_outer_core,Mesh_pointer)
+
+  if( SIMULATION_TYPE == 3 ) then
+     call transfer_coupling_b_fields_cmb_icb_fluid_to_device( &
+          NGLOB_OUTER_CORE_ADJOINT,NDIM*NGLOB_CRUST_MANTLE_ADJOINT,NDIM*NGLOB_INNER_CORE_ADJOINT, &
+          b_displ_crust_mantle,b_displ_inner_core,b_accel_crust_mantle,b_accel_inner_core, &
+          b_accel_outer_core,Mesh_pointer)  
+  endif
+    
+  end subroutine 
+
+!=====================================================================
+
+  subroutine load_CPU_elastic_coupling_fluid
+  
+  use specfem_par
+  use specfem_par_outercore,only: accel_outer_core,b_accel_outer_core
+  use specfem_par_innercore,only: displ_inner_core,b_displ_inner_core, &
+       accel_inner_core,b_accel_inner_core
+  use specfem_par_crustmantle,only: displ_crust_mantle,b_displ_crust_mantle, &
+       accel_crust_mantle,b_accel_crust_mantle
+  implicit none
+  
+  ! daniel: TODO - temporary transfers to the CPU
+  call transfer_coupling_fields_cmb_icb_fluid_from_device( &
+       NGLOB_OUTER_CORE,NDIM*NGLOB_CRUST_MANTLE,NDIM*NGLOB_INNER_CORE, &
+       displ_crust_mantle,displ_inner_core,accel_crust_mantle,accel_inner_core, &
+       accel_outer_core,Mesh_pointer)
+
+  if( SIMULATION_TYPE == 3 ) then
+     call transfer_coupling_b_fields_cmb_icb_fluid_from_device( &
+          NGLOB_OUTER_CORE_ADJOINT,NDIM*NGLOB_CRUST_MANTLE_ADJOINT,NDIM*NGLOB_INNER_CORE_ADJOINT, &
+          b_displ_crust_mantle,b_displ_inner_core,b_accel_crust_mantle,b_accel_inner_core, &
+          b_accel_outer_core,Mesh_pointer)  
+  endif
+    
+  end subroutine
+
+!=====================================================================
+
+  subroutine load_GPU_elastic_coupling_ocean
+  
+  use specfem_par
+  use specfem_par_crustmantle,only: accel_crust_mantle,b_accel_crust_mantle
+  implicit none
+  
+  ! daniel: TODO - temporary transfers to the GPU
+  call transfer_coupling_fields_cmb_ocean_to_device( &
+       NDIM*NGLOB_CRUST_MANTLE,accel_crust_mantle,Mesh_pointer)
+
+  if( SIMULATION_TYPE == 3 ) then
+     call transfer_coupling_b_fields_cmb_ocean_to_device( &
+          NDIM*NGLOB_CRUST_MANTLE_ADJOINT,b_accel_crust_mantle,Mesh_pointer)  
+  endif
+    
+  end subroutine 
+
+!=====================================================================
+
+  subroutine load_CPU_elastic_coupling_ocean
+  
+  use specfem_par
+  use specfem_par_crustmantle,only: accel_crust_mantle,b_accel_crust_mantle
+  implicit none
+  
+  ! daniel: TODO - temporary transfers to the GPU
+  call transfer_coupling_fields_cmb_ocean_from_device( &
+       NDIM*NGLOB_CRUST_MANTLE,accel_crust_mantle,Mesh_pointer)
+
+  if( SIMULATION_TYPE == 3 ) then
+     call transfer_coupling_b_fields_cmb_ocean_from_device( &
+          NDIM*NGLOB_CRUST_MANTLE_ADJOINT,b_accel_crust_mantle,Mesh_pointer)  
+  endif
+    
+  end subroutine 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2012-02-27 16:39:04 UTC (rev 19686)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2012-02-27 19:29:48 UTC (rev 19687)
@@ -1070,6 +1070,7 @@
                                   number_receiver_global,islice_selected_rec,ispec_selected_rec, &
                                   nrec, nrec_local, nadj_rec_local, &
                                   NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
+                                  NGLOB_CRUST_MANTLE_OCEANS, &
                                   NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
                                   NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
                                   SIMULATION_TYPE,NOISE_TOMOGRAPHY, &
@@ -1244,6 +1245,9 @@
                                   kappahstore_crust_mantle,muhstore_crust_mantle, &
                                   eta_anisostore_crust_mantle, &
                                   rmass_crust_mantle, &
+                                  normal_top_crust_mantle, &
+                                  ibelm_top_crust_mantle, &
+                                  ibelm_bottom_crust_mantle, &
                                   ibool_crust_mantle, &
                                   xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
                                   ispec_is_tiso_crust_mantle, &
@@ -1255,7 +1259,9 @@
                                   c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
                                   c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
                                   num_phase_ispec_crust_mantle,phase_ispec_inner_crust_mantle, &
-                                  nspec_outer_crust_mantle,nspec_inner_crust_mantle)
+                                  nspec_outer_crust_mantle,nspec_inner_crust_mantle, &
+                                  NSPEC2D_TOP(IREGION_CRUST_MANTLE), &
+                                  NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE))
   call sync_all()
 
 
@@ -1267,10 +1273,18 @@
                                   gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
                                   rhostore_outer_core,kappavstore_outer_core, &
                                   rmass_outer_core, &
+                                  normal_top_outer_core, &
+                                  normal_bottom_outer_core, &
+                                  jacobian2D_top_outer_core, &
+                                  jacobian2D_bottom_outer_core, &
+                                  ibelm_top_outer_core, &
+                                  ibelm_bottom_outer_core, &
                                   ibool_outer_core, &
                                   xstore_outer_core,ystore_outer_core,zstore_outer_core, &
                                   num_phase_ispec_outer_core,phase_ispec_inner_outer_core, &
-                                  nspec_outer_outer_core,nspec_inner_outer_core)
+                                  nspec_outer_outer_core,nspec_inner_outer_core, &
+                                  NSPEC2D_TOP(IREGION_OUTER_CORE), &
+                                  NSPEC2D_BOTTOM(IREGION_OUTER_CORE))
   call sync_all()
 
 
@@ -1282,13 +1296,15 @@
                                   gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
                                   rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
                                   rmass_inner_core, &
+                                  ibelm_top_inner_core, &
                                   ibool_inner_core, &
                                   xstore_inner_core,ystore_inner_core,zstore_inner_core, &
                                   c11store_inner_core,c12store_inner_core,c13store_inner_core, &
                                   c33store_inner_core,c44store_inner_core, &
                                   idoubling_inner_core, &
                                   num_phase_ispec_inner_core,phase_ispec_inner_inner_core, &
-                                  nspec_outer_inner_core,nspec_inner_inner_core)
+                                  nspec_outer_inner_core,nspec_inner_inner_core, &
+                                  NSPEC2D_TOP(IREGION_INNER_CORE))
   call sync_all()